diff --git a/.gitattributes b/.gitattributes index 7210c02443d82361db1eb0c5b789738d44d7a318..b0f67b10ceb944f1d564b620fb0f1b9ca7a5a9e7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -98,3 +98,71 @@ cudnn-windows-x86_64-8.9.5.30_cuda11-archive/bin/cudnn_ops_infer64_8.dll filter= cudnn-windows-x86_64-8.9.5.30_cuda11-archive/bin/cudnn_cnn_train64_8.dll filter=lfs diff=lfs merge=lfs -text cudnn-windows-x86_64-8.9.5.30_cuda11-archive/bin/cudnn_adv_train64_8.dll filter=lfs diff=lfs merge=lfs -text cudnn-windows-x86_64-8.9.5.30_cuda11-archive/bin/cudnn_cnn_infer64_8.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/PIL/_imagingcms.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/shape_inference_test.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/PIL/_imaging.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset13.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/onnxscript/function_libs/torch_lib/ops/__pycache__/core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/reference_evaluator_test.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/PIL/__pycache__/Image.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/pip/_vendor/chardet/__pycache__/johabfreq.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset22.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/regex/__pycache__/test_regex.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/regex/_regex.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/PIL/_webp.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/PIL/_imagingft.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/function.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/core/tests/__pycache__/test_args.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/expr.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/numbers.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/setuptools/gui-arm64.exe filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/setuptools/cli-arm64.exe filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/safetensors/_safetensors_rust.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/physics/continuum_mechanics/__pycache__/beam.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/combinatorics/__pycache__/perm_groups.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/logic/__pycache__/boolalg.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrices.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/PIL/_avif.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/matrices/__pycache__/matrixbase.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polytools.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/physics/control/__pycache__/lti.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/parsing/latex/_antlr/__pycache__/latexparser.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrixbase.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/polys/benchmarks/__pycache__/bench_solvers.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polyquinticconst.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/physics/quantum/tests/__pycache__/test_spin.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/polys/tests/__pycache__/test_polytools.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/printing/__pycache__/latex.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/ode.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/single.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/printing/pretty/tests/__pycache__/test_pretty.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/utilities/tests/__pycache__/test_wester.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/bin/fbgemm.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/bin/asmjit.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/printing/tests/__pycache__/test_latex.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solveset.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/solvers/tests/__pycache__/test_solveset.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/tensor/__pycache__/tensor.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solvers.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.lib filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/c10.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cpuinfo.lib filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cudart64_110.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_cnn64_9.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/c10_cuda.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/c10.lib filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/tokenizers/tokenizers.pyd filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cudnn64_9.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cufftw64_10.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cupti64_2022.3.0.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/bin/protoc.exe filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_graph64_9.dll filter=lfs diff=lfs merge=lfs -text +pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_engines_runtime_compiled64_9.dll filter=lfs diff=lfs merge=lfs -text diff --git a/pythonProject/.venv/Lib/site-packages/PIL/__pycache__/Image.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/PIL/__pycache__/Image.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e915503fc62efa1f08324a466476dbb0310f7e44 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/PIL/__pycache__/Image.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f888691b6512ec7a6921ab20b47265b719f5d62f14e6eb710e690b02fc0f501 +size 122510 diff --git a/pythonProject/.venv/Lib/site-packages/PIL/_avif.cp310-win_amd64.pyd b/pythonProject/.venv/Lib/site-packages/PIL/_avif.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..1616dc871e308141f00547ecff055acde1b07dbc --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/PIL/_avif.cp310-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478b9453ac59bf8063a1c70ce883f4b369ce02a0018c4c72a3b3a01e1b5b5200 +size 7833600 diff --git a/pythonProject/.venv/Lib/site-packages/PIL/_imaging.cp310-win_amd64.pyd b/pythonProject/.venv/Lib/site-packages/PIL/_imaging.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..5913b2fe096ed612ba7124cc944e4041ac022a58 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/PIL/_imaging.cp310-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c38c01b4420f5d8e19b11e2d652b7e5ed7ec015b7995432209e13dba722b94 +size 2489344 diff --git a/pythonProject/.venv/Lib/site-packages/PIL/_imagingcms.cp310-win_amd64.pyd b/pythonProject/.venv/Lib/site-packages/PIL/_imagingcms.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..7d20b94b3d50a8ad1f06e4908104fa3c192b2d50 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/PIL/_imagingcms.cp310-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5159b12252444b1e104d3bec95ecd40d26e342642122ec313fb2267530d55af8 +size 267264 diff --git a/pythonProject/.venv/Lib/site-packages/PIL/_imagingft.cp310-win_amd64.pyd b/pythonProject/.venv/Lib/site-packages/PIL/_imagingft.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..ef24e71d2b6544baa7d73fe14ecb5a642d0d7044 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/PIL/_imagingft.cp310-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118fcedcf66ed255a8eed7bd0e4e3922f6b72ce5fd9b3983e7fd2ff31688ba7c +size 2010112 diff --git a/pythonProject/.venv/Lib/site-packages/PIL/_webp.cp310-win_amd64.pyd b/pythonProject/.venv/Lib/site-packages/PIL/_webp.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..a81f2d51ca80b5478bd5d1fed8c24379322174c8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/PIL/_webp.cp310-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d3ce8cbdb95ec507f96fd763ac7abd61621852effa339ec97c942159e5a810 +size 409600 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv1d_pad1/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv1d_pad1/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..eefc9ffd762a57fe82cecc90358f332042575e04 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv1d_pad1/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2b37f24957817bcc538c5a408036e2b8748ebae583267e2649e50faba4ff4c +size 331 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv1d_pad1size1/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv1d_pad1size1/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..30cb9478a4ddda8d089d960fd63886e282939066 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv1d_pad1size1/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42b84f52453522269a406f5ca3a69e07b3cf9fa23cce0aa3446e03cadd71734c +size 26 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_padded/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_padded/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6bfa3f6fb1c9ad08f00fbd380b9a4666995f3f26 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_padded/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f118af2993a3d8f5a3fffe7b9856ecf6b9c65d1cb2752f6e31708aeff0f5fa1b +size 1165 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..966168acdb3835b0fb91ac7fc1f08e4cf222d0d4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4164370dfc5438879f86bb22a7b2b162a8596e0c1855cf1cbb663418018bfd6 +size 449 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..858083efde005594562359ba6dfec9a8ef1886a1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8c52c69446d56c0225d787d5dbd447707ef29ce1b4283b22d003413c24a0db +size 1165 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6e4e119b6507302e4f59196a0e381ad582969fa5 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_strided/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a30a3f316a3332505bf519ad6114e2eb63c87ccf6af494ae333f40b71c081f +size 141 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_with_multiplier/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_with_multiplier/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cde815b92a11f9c867054209e558d57276196b6d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_with_multiplier/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74ce65ac65a324b29d2a52fe5d4ec35ae1bb7e2cb3c85a469439e077f10fd81 +size 609 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_with_multiplier/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_with_multiplier/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cfee0cbbbef70900c408031a4cbe99aaa3c0dab --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_depthwise_with_multiplier/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f1a0f0fc22f9410897264f208675688a4516fd70862e947e397afa3eb8cb1c +size 1037 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bb7a8f8b430890de42efeff7a5e80132ec9b4734 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3384e8834947a897ab76ef63702b8a3630ae115396d21f44ab8bb7e93ee18f04 +size 513 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d24712220fd5444fbda473de7161ffdcd0a228c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a18a7b21299a3077fab44727b9a78c6ad345f2821497e0b8b8c1127662187f +size 1549 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc600bd6fb7652dcc5cf29b8b0744f1111a34d8e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_dilated/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34da36bd4318d8183c3ad3a69693bfe84abe65502a12b66cad6655667b57afc +size 157 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..16ed9d1b02514da006e059fa3c261936ed28b973 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba1fa246bcd26c5eabf7a26ae0d4c69ca57bdba1c10e7bfee69863a778a21092 +size 601 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ee226f55c028d687bc7b40baa3e2c2d0b3c32e24 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013cc774e28717b58b8c45922664ae5bfeafda7132848da2c51076afc95b7efb +size 973 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d87d9b8423232cc82638a45e5846be8709cabd2c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4092923a2c39422200270d42b6e667dc12b9ad74c355e2bee6d327d1312f91 +size 781 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b1c754ee80b0c0cff778a38f1a1ac75a58671d0e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e31fc9569a5c9b5c41d84b3464a752bb9e084f75f551f37aa45d8b4ddb115590 +size 601 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..950d1a241a1efef5e622f038ab235bd7b569e354 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6ad13ef4861ac3f32a9515a45070d9e4756dfb1d32ee2fa3aea6c9a7006f43 +size 973 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd98a781b7faf0d6b68757537b32680ec5103b01 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_groups_thnn/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d1a23c2ff27ae1ea300e25f9f5873b736b7e9b6a9f587aca2c87e558728f8d +size 781 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..aa021fc0a496bbc0c931ab1fa79f012d545838e3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c2ee5a70dae7f8ba9acd68e7c44e7fa35e05ff36247abc27283a64ad233714 +size 546 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..3c5f771274f244008f1a3f624d7ebbcf2901f4a1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e408d720feb472f84076f2a552fd4c7f476767b60e1128b21a1607e00df3ccc7 +size 733 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d850e81209bbbc5f5421d94f6e8ac6bfbccca690 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_no_bias/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7f991938036d018e9d27dc36afa8ad5e92d82f31d75c733c35549275022ef8 +size 525 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..dd4239fa090ed66fa808fccd7a9f048ade8b988b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1ddb4594fbaf1242ea597fa5aa47f4bab10bac4b3172df8e331b392caef0d5 +size 737 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a602ab7de57f370188dad4663add07ef4b65744c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b2f97b0f64f9072a269f649f58ee247bce3eec5552f78b84402236ff1cdd34a +size 877 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..635c6a05db14dbe83d93f9bc89184e865c82a53c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_padding/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61907fedef6ea80f04475e6ad507a71dfac8517b767d09cf80e4db2d7f0217e5 +size 301 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c0bacff326bfd5b8796c7cb1aee64a73b79bb5b7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a067ef80bf9ad828224c1841869ecfa96ce56f1f9b6ac884b1c7a050a9978e2 +size 737 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..972e1e9cd4d474072a33795a2fad6b3f91bc88a2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c895e7ec7d22fd3d56c69f8299491e989da8c9ef5bddf1d5b5348deb957450d +size 877 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..df0f4d80a6e2d485255b679cd5b5cb40c5914b74 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv2d_strided/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce395275b74ee506c0f73a6c8c8e10850ddf6241e330d7d8255440b894e21a5 +size 141 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..01f4ef26ec61759f2854d98b03d60bc421880a4f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a2643307ad773209a837bd20de6bb03e82d9f08d65459ab78abb90e5bdbd80 +size 1481 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7a78f0642914732645c9cde51468935799d462e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e087f4b71a027f4d92c726fe4cab154a69dfc8f782c89525d2f94372385bff76 +size 1455 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce761baf740391242dc26d6e3813ed20f7bca52f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bba44bc357e6527b24b6ad4f7a61a14afca2ca18f32dba514b409c7b1113703 +size 271 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fbeb60b38543b2f14d1cb4e9746f1ea5bad982b3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:809a11e78f1a126417ee4130f837a02f5f25e961bd3d98911b89b20066492cb0 +size 713 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b9de1156666e1ac5180f295bc2bbf690b70d957e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2053ec05ecba39974770157fc581757607aa4ee8f064f02e90f15b98977ec9a +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..04b7970deb14245f22e10659fbefe1737e9996d0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6555e6ecf6ab107be30f415170bc83c34b024ad8f0360f4bcf27c1d9814b6992 +size 879 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..63877f57c886504536b340f7f116aeddf3f9501b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305efe8553722b1a5239808f3ad32a8f83cf2ee999bc4c8d2e928a747d1567c5 +size 713 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf646caae345b0a32987efe2047a31bfc5b2d408 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12416934f234b95f8f12d66aaf99c9e032f1864f782a2a7c28da731c42f452ae +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..e706b351621fb0a2b7c363d4195ae7e11baa4061 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_dilated_strided/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:528c5c02a23d1c73844452a46167f6ec6f41dbcf8c15a90102d200a18850881f +size 271 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_groups/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_groups/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1b26be46e416edae998f9582b521050c21d0967f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_groups/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd202874c3886a639b27313c83f768131e15b583b550e96567610a0b49ad36a +size 1633 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_groups/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_groups/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..edd584c97ea0ee62d01abb7ed2c4fe55148e9678 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_groups/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e93da49609ce30ff9c41138bb25c1d984711966bf8639b14dd382ae4de46fe1 +size 591 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..abcea49d7ef49d4e6371098359162e838ba19a55 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e21992067ff81af56288b042fbadd941455dfb54145e54baa70d9e2aa7fd39 +size 1434 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a25a10681603c014111edaac5d27966754dd566 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc1d2a91d6b17f5d46e513fe076e322735f10ce35ee29de8b4e6ecb570f21e0 +size 1455 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..4e1c467c9daf996034dd74c440e1d9472f2236a7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_no_bias/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b9d466ee93b9bdcdb7e49ba809ee67f6c5d7aeeda56aaac43f13136aec6b79 +size 271 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..220ba83b98c37d8241bbc295583ea12180ab2ed5 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1196f59fa6d825c88795c64f033574f168bbab51dcc6f2570b2e5a3ce7f29d0e +size 713 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..2009353dce4b3f2a9a835089d63afdf837d5d3d1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d29385285f62a97069578354a79e7588d6250a8783e43d7c5d18b971e9f5d07 +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a3da6fee730efe984d70d2688468b147cd28dfd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401a6bb16f11ce656b0282d5334550f43d4338166a87d62696ea0db830253ca5 +size 271 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4045279ae635e47a5e74872dc2e8e648d3b582f2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c33d6d095c325e667858235d912affc8c0876b4b5537187afcb98118fc8be2f +size 713 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..db58b80fbc5c262243af030abe37486c43ad2511 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4acc121a8c18908fdc7cfb24d60f61115b2eadeacdfc9da014927f68f8862b08 +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b7fa44164cacc95a1220e71b2299f139f1de338a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Conv3d_stride_padding/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e68b433ba94ebbbd9ca0e7485d44ee3a414df92d3a764a6ca4c849ebde32b56 +size 879 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ada985df8e0540806bb484446da9fc74aed90652 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba26b02adad99f5aa585a2958df0fdecc783e70a563f8f8181fb205aa00702a1 +size 772 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..2c0bebf450ec8ea29bfda77fdf969ae3e148d182 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38576c232a6479be9297b240e0c97fbd10f6ec3da87f8faa673e0767e66e70c +size 517 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b3dd5629ab98d77fce642e9b040b3877713ea366 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dacc56b95f1562eb365f9d52856de44465d707f62049a3f8a456cf915c56beda +size 3853 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..94811f4d66105f36a2c90d0e79992e3d309d0997 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e29cc361f4ea0ce6e934a2d00e73683b849fe150b300b9219e3ea0fca31b6574 +size 725 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..24738049c379d46304d052aa5989878927703bd5 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703ada80743b852fb1b4dd5f308196c8c5c9d8d7ab6709ff2520fc08d6521802 +size 517 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b0ef72510778abad814385cc99e21848f70d82f1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ConvTranspose2d_no_bias/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200d0bdabdb5e4eefac4cefad56fe29d6241215d9d2b84af29bea0ba3efa92dd +size 3853 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ELU/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ELU/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..190b08ccd45e9a51c6f4226ea380230adb678f6c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ELU/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d927976b1d0c2339216bc1dadfd71cd9b40b5cc4cb214c008289ab1a1f716da2 +size 120 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ELU/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ELU/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5bf261381f11b2a2f1ef2d69f81eabbc920901e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ELU/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9dd7efe99168b2fdb2cb1cb1dafd987718dfe1e7f9bd1b6c027ae2a2a15b2a +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9715208d1501bb8fa00c32620a023f2c9e72c7c9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4a3e2cffc38cfc1b056d03c5aa79069ae3ee37651e35f851f6e62a7e1d67d4 +size 188 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..86b10f76760aee04a3f4a85e8515e22dbf26fe1d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4adcebe31cb6232f5b2f0ce8d49656c9b86d5c4b0032c58ce86c4eff3120cc +size 40 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..763f31de2406a7e1060e0beee2a371675f637c41 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f583e01e392d647095d12fbd8ff141d9b2e685b765220a35175283432a76512 +size 58 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..09cb81a3dee9b33b47744b47743adcfe470f2294 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179bd4f6f4d062aaee42795d277f1048c30b3ae2d02440df9b0cb65e50cede16 +size 188 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..86b10f76760aee04a3f4a85e8515e22dbf26fe1d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4adcebe31cb6232f5b2f0ce8d49656c9b86d5c4b0032c58ce86c4eff3120cc +size 40 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..67b867488cdac777f595008525596e564b7b56c6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Embedding_sparse/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a685f01e2bcac20824b531da7b7e953394261bc76bc975f648eb4d01552cb2e3 +size 58 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..470a07af9c98c241e3854afad97df29da260562d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d09a105a897b5ebbde7a0473fda31780c22a8c941a83c125fab44b2b0476aef +size 156 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea9cc928359ebf76cb581823feb6dbdbe9fc5d38 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7c06408c186f018dd09a2943e49808caa7224050ec46eac04e4e411501279d +size 128 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c7dceaa1666c38b6f3b866204ca043cacc8a2b2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:285cdbdf161e15e03dc4798a94d848e805ed7d80b3f7d38df035731b5a228661 +size 68 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU_dim/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU_dim/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1c6484f396095c593a249c2a465d8e1711f8378a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU_dim/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ebcb7132d4e866c31d0ebb82659dc0e86cd6c9552a24909d82c86e6a8fa0ec2 +size 155 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU_dim/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU_dim/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c83b67fb5178cb3cab803106bec4e153ade8ebae --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_GLU_dim/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1041a6ce9d670bdd5038ca4edc15cfbac48ac18481a5aebae0b48e90ecc2247f +size 431 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..584bf629b24a6c00d9300d91bf7a140abde9044b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48aef0448f20fa8d66bd23630d2f6eb57b441bc01359733676bd5518e5b707b1 +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c589832d33ed84dade8f7e4134ca1f04d536bd2e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abed185b01eda5dcfff4359a9fd21ba24855d69f6d8bbdcc3289e4149a49ef5d +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ae0dc5cbac8515f62bac38827552ba8b3f80109b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ce09526f79bd9bdd333013d85e43dbfab55f27e6b5c0aca1c445e887290be3 +size 126 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..adf526be2e89c58397ca002ceb1bbcf27fdabaab --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:763617c7449e8d37bd8b9c0715c284a3669bb38cdfa4c6dc44bb54c629024891 +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..12f2efe6266439600421fd1c63b800708a55e958 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LeakyReLU_with_negval/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042bc3bf628343a8fe2ed916cf965c4acffa66037255806c4c4a58e6cfd2c584 +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..fc060dc83ff19ad2bf1c52f88055d48a231f9d40 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11504362c0fa099cec3e80034c27aa1c99966ab493be7a92524b9c06a37c61e6 +size 169 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f5d536a7e957140caabf6d5272349f4f88378fa0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63631630f831239f084c258dbfd475a647d1f88e352402aa220773c6f4e3a314 +size 137 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e485330dc41211a1b74ec9f8566a08cceb6dfb75 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:071ce49b698635b090487697ece6d1822cd025d0902f4a39f9711bb9bd433e8e +size 492 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..4b1be3c4c09968451464bd87109fa31fc1596786 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d4d4fd48dc94b5da890bfcd5f72dda17f46b3322c3e90452bbd1e18ede7806 +size 169 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..eb63f24dfdad5e47125c5680eca678befdf76763 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Linear_no_bias/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ba13745d2637359625293b121d5d5037748c9b4661166ff75e45bbee30ad8b +size 137 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1d3bb1c738a2d8bccecd50ce724fddc01b96fc83 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ece1bf87cacddd80dff54ddf083671484d6c9dbc2b919138a83cad94e9cb9b47 +size 115 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..db0c65b2872e2779770d8dc407cd733f4ce5ff5f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688bc46f385aeb3556a0e3320d09345f4f85ef271598e4d040f0cfcc22bf7dd3 +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f319d1dd5ab58b06ea34d090d3ee857a9dedad4b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_LogSoftmax/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c2df10525d3a4e3a627247d8029f0fd6bd51ea0cdf94d928e2d9dc7d564dd5 +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1ee22256c16bffab6ce70d4eb25a330a88dccb0c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204b5c894d67495d151596eb21fe7927a95fa59ebfdd55cebe0c8f6dd4ad9ebe +size 160 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0aa585241ddde87572a360963ffdab52701d8578 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346b8f9cab3a8087256e4b782112f5cfb5af6cf9bc0341aed9f5fe49a35578f1 +size 331 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f538042c19e08377e62544aa97ef731b224a84ab --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efdb220cd29ef3ae83d1f0a1f715a009ab7f225b71be6ee7f82fbc87a1cadaa6 +size 90 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1ee22256c16bffab6ce70d4eb25a330a88dccb0c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204b5c894d67495d151596eb21fe7927a95fa59ebfdd55cebe0c8f6dd4ad9ebe +size 160 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6b3979c7297edbf27f05267c9b01ee401dbe759d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a7e3d0a090e2c820183814b6ab0fafcd5bded86ec520df21802e20adead1a7 +size 331 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..636b0b893d999f588775e85adaacdf8a7cf4ea08 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65eaff0f88c328d9422c03d998a1972e4bb7b0c5db2ba9f6103f0a775c3900a +size 90 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..528a2d3d8b23a27a82963f19ccc96266658dfb93 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e55328f92cc0ad554cc9bda789196d71901e7bee980457860871ccdd18f3556 +size 165 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..590979a501f7943c3d1cc63e7ff6191caa65e3c3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a26f50e594a377fdd68f0975d39311133a3137288662ab212c849de7c87a23 +size 880014 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a76007056d497edeaffd7cc34cf6bc80fb0d330 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool1d_stride_padding_dilation/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e831f4939253f7beda74b90d9a5a6ae245f4da5405c0e337d3936325658002 +size 87298 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..92086ee58d142b17e6115d5d423a0b64ccc572fe --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaac34265d1d2597205177609276ec2973c4b5bc483c1c414a6b9653b4d60660 +size 176 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..8777cab21460764145515981e46d147677611010 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b3e2276af48d653ea007a533aeb0843f85f4de66f704c34a57c1d9c589bba68 +size 601 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..09c1d300854363e22982b0245ad69a60c6b525e3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4aeab61341803bd821a566fdbbfa836768f2fbecfb400c6ec2822f17fb0c98 +size 205 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..79aec42e088a6c1a4adca001b40726856ada5ef8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d74ed26c95a5990f39335113f29258d199d1adb300ba8fa112292048c15a0f +size 180 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a9e215b4219a1b0ca108a9d9aebe56f1a948a7fe --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28444dcd7d1d3a1d9d6564dfd1d561ddfb6e6abd0dd5a4d81fd917252b888df5 +size 4000017 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ca8ea6f4ecce464ae14fe84cd41e1396f929b145 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool2d_stride_padding_dilation/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa681dc60c527639be817a75ebf586fdb1909fa536f5745998bccd37cb258e2b +size 4313 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..95175072891413c0c9d7d811089d3102d17a07ef --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124548c54d86bddfb4dce2ef87343074ff60338f0895ad78a4d9acc921e54c9f +size 192 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a61138a3193311c4594f6f45f2720e3306446322 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749e4d5145052c80652a003c088c6cd94d427537a515033a19a39572375f5c7f +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b3c6165a8812a7be55b6cf8cb3381292a3bf463 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3314986cae66c39f04c2e733ea89a691549cc4b05b6921f18672286986c8a2f1 +size 207 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..95175072891413c0c9d7d811089d3102d17a07ef --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124548c54d86bddfb4dce2ef87343074ff60338f0895ad78a4d9acc921e54c9f +size 192 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..2fbe7614fecd78d93369305ec8cde8cfc51e04ba --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3167f333970b64ada34b7c10cc0e394d1d1f3b7117822b868dea32824af7e27 +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9cc48230a8a3759bd9a82f8806ea1adfffc3a8a9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ed68a3e9628f4ea8a587ca130bbae437bf13fc27d7f335d2cc91c990fe3ab5 +size 192 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7939339253985cd3c55fb9a1ce0b361fbf522905 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5662f7cf66ba3e37036da9f3ae0dd32d937ae2c88ab1754045a97a8d20d1c4 +size 3015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..8d91969e52d3b272a846f1e93f07f37d356ea6ca --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_MaxPool3d_stride_padding/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41633821dca76774de04458996b59c9a22ee47317cf86503b088d4f1145a7913 +size 663 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2f954dca227e754438864cfbfea354abe7b6ebd6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e268ef13688982483bcd14d2d18d0cc268b7adbc43ccf3405c5ea5c36916d010 +size 140 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..33f33ddf95ef9c5fa6d24b47b213bc25511e00ad --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7700a3e5d3a8d176f42d6a0511ad363178cd08129e93531bee9f65a2a9fea816 +size 106 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..282a523078136b47238e164455bd400339583e38 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59555fc3b94de1b50ef7596326e382ac5d1a02c01578eed5c4c58b2423cf7f6e +size 106 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ad19f8c1b1847e042882c1cd0eb423fd83c2d798 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f63cb6948493484995b46db7927f233ad0b080eb6d8bc67519878ff9008328 +size 148 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c8e3bd0525cb50ad21a70c48b75a8a08325d755a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94825136b7d4e5186d8f38308daf297de886423094679f6b3690528fb6276e8 +size 106 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2065cd67cadb0d66117aa1d7c9f402fc1450c84 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_1d_multiparam/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:913c1d0ee63425ac255c3b50c8a6daf1a95ff3b3cebcb5be1f4b2f722259211b +size 106 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..874a3181a257a09d58ad0c792f12dc2ac8890329 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2df9a380dd056908b0e54e9acbbfee504d7282f71a85a0017dd06ea6fc3d4bb +size 148 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c2188da5d33a18d8f57f5803465ec79d9a7b6f33 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f59244e7311babd88052926097b2138c5c1337d53ae9de072c0671cba31c0f6 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..013c555122d8cef0b94d30de2d2a089ced3658a3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf0e9e8fba2136db930362524a9da8de89894c0b0593c26ff712f1633b37eda +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1bd17587ef855109f8436bdb8a37c845cf6aa225 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e920bed8f828c5116321ba6717996fbb8f901972a257cf0920c8d8ef7d264f +size 157 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9fae5c4c0ad181ddc071216db87c85986b26a27 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3552b9cf7041fbb7cccd2a440157bd289f3fb235bafe8b5e43e945b34ed83260 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a42f418000e77925370a649a60d1d9e74ce4e68a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_2d_multiparam/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e523e4454fe79323d2a966b01cffece48e6716719771e6bb8baae2841fef5876 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bcf37ddc75a59636851e2f80bac1204f3184924a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f66df2d06fae1d6fb9a5d871adc3c5e54e0cec69b33d88692456b4f821fe34 +size 157 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..fc3a2381a0dc0e6fedb894e5cf02f0675f79ce75 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9233487531057dbd7aa0c47fdb58271f4fb1d72b9f436d2bd09270b44b7ee27a +size 2895 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..dd029aba7a3f59f9835a7e08db7d1ab2547e0e1e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a57c84d6941172a76425132a5c827000db3ddc99c1a6eef7b0f8ab3c94eb71 +size 2895 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7a4397ca6310de0b557884151ee886f1361f93c8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88e381630d135d3a09aec1d79ec2dccd6506db4d8b8fb39d02c724b6d510c85 +size 165 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..552cbd9a43aa199afbfbbf5388ba32df52eb6574 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec22db847811f6385febe7d2c7da155433e4c81dea549c91a90555b2333c4e70 +size 2895 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..44737e4623ea335086d708e1a5828d7271b4aa01 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PReLU_3d_multiparam/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6faa503f34b7f5bae1833fc7919b10688535aef4593fe38d825dfa9f6bc688a0 +size 2895 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..54783b740bedebc98bfb0ad916f85c05290f47d4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0912a957ccd84ac2a9e268b08e592152b9e59fb8e9fb55b6852abe362881ca0 +size 331 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d49e23035d390c2623098c8448cf4dbbd3f423e7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039efa92fb46bb159c808f3bd96e7e73cf463015758904edafa673b2285f25fe +size 589 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e080deb7361d56344cdaf918753c7a28cd21e42 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PixelShuffle/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9b075273812c8a180fb7992b6576430aba85a0cd550235fe98388c9679666f +size 589 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..08bbc88d168f87c3e6de44c20162c4c9447b255e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b425a3f9ae177a598eef2d9228a86604acad45af60cd61f4e0beb8071a180463 +size 569 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b9f34887906fc2a439b5a92c4acfdd1bd418f3d6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a343bc36cc0031cceb7205f2dc06f50203ea94c9dc276c77701acf309eb2f9 +size 409 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..8f98f2b2d7eb247d212f4893e627331b5fbd8b72 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_PoissonNLLLLoss_no_reduce/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6832c53d1883d5318679b2b0513d973ac64ffe85e19cb0224f4de6cc64880b +size 409 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..37c026d529f212fac16aab8bbc0333c0afd106f3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e786d816f737440d4b8ec7faf2b156a686b3c88d1ba3fe15f273b316791114a6 +size 112 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f444d87b5d260a95e8b973e48f1ae7669953973e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132da94694a056f661029fdf5dd24b215022764a6287fed74cc6acd9a53fc776 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..1dce3e12363d55254cfef4b853e39a4791301b96 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReLU/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a752e31026e3dc21edcdcddd22bd396c1ac3fc678bcd639dd31355b2d020fa92 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..06b152a68146e68d625418693e0835d0445a6f44 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad18f5806ab0b52956775e04e63a53b7c8b881747e82c1e4b4e147d9579c9696 +size 159 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..49f2282aeb5adfe4c674631d971414c109c2144f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19dac24e73ebf6545563f5cf44378c0e0efa6d60cac5cce66cba6b6e1173f350 +size 1549 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..848dcff82a63f83fb17097d07ecc4aa2d8d458f4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReflectionPad2d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308b4a7a7af1b1f7a7d10a9b025dcf231f4c4e1d97f14b15b3355a6266d318d6 +size 3973 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bfccabf8f0b319823230cf1d2d842d0916029fe9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896ae776de6beb2f711fdb4959547fb2bbd6777a87448f98cdf30b80721eff1e +size 156 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d733ab197a0182c9dd36a9c5a3e184e873b0d492 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefa6b08007cee626acb97362b8edd5cacfa95788083a564d0f09234821ff1f8 +size 397 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..2b743aa970fb676e1fd2315213c58e532177cf2f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ReplicationPad2d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a91b296da1b4a9f23e4f89327510b0de91dd01bcfdd81bd54b681df889b29db +size 1861 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e7de91ac29cf5d9d7c5d3a0b235dc550d5be5b35 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b617d81ad8bd304d82a67f14d371e667a2397c82bbdfbbb45950bf3bf651f4 +size 104 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6dba94f03864e9b8d6acc14900874201e0c54fe4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b32d5dd4969a4f2b577efe825fa679abcc005acd9352355358d1b519df42fe +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..637b29a8cc4a88e4057d7d5f9e1d55a1ed04e3ce --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_SELU/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4be99621f7969b8b771c507132e6e3b38ec6e167912944946ce971b6dc4e59 +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e0c35c6eb0da9b01597f0b22c9475a3e08b800e4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe8d7bd3bb6381d6ea07484737c1e5017e2a56fa3f9ba261c44e888b4677598 +size 115 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..79355fa0a59f521e876460dde05af74ceb602882 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46aa18a89508f501717f27d0c8e91b217251dd913b6f51f8c0f5435ea07df88 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f95e10fd7195e68e0dd1edbf2a9418b8362d53cd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Sigmoid/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdab9df3faa0cbc9b22a8b72e36a2e0c6bdcf0ba81f4fd30ce2335593f5476fd +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmax/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmax/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3b4882343cd681c919c53f351d227706adf9756f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmax/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c614f8f96bf15a31fb1be0db2b48f5dbceb554667eb140d5616daabcf546b4 +size 112 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmax/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmax/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc69d1ac8ea72ec312ff8c6a1e1a95647632b588 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmax/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70405c37203e50eecfbc93f4e413453e0df83b46fdc6e236a081f41ee8db2420 +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f26b1dc6f95640c4ae070dedc1deb615ee650bd3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4cdc2d57f499598a955d50ecb691cf6ae8a9955458e4a1a8b03417b092deac +size 125 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a3f726d3991179e30a142873071114718e01fb9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84be45e0b0cd6a7aacb8c57beb3668f31604087d48352c00ef2ae8eb921bad1 +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..fadff17f453c108449b31e9bab8ad4f71617d2a2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softmin/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74cd19ceebb00658c2602979830b73e9dca5ef16f03ead3a3922696d9cf7ae5d +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softplus/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softplus/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf4e5e3ce77301eee148ef3ea2a92bd19b3f5c81 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softplus/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447096bdb649da9b8972c92b932a7e50eed52578096dbb5f9e0bf9d26f9706b4 +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softplus/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softplus/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..1037d1cdb15bcefb61693c47ecd66a25cb0d0e8b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softplus/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcdc154264dce020c25e3bf2a39cb1429777ef3baa48dacc0c9fb01c1efdb3e +size 809 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bf7b06f7f6098f49909e56b34509003f6d5f28cd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb02fe36f0865ac12df5e609eb1709d0a319b34b173ccb2be31af6e9720f08e +size 191 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6bb2a4506d1b6bdc8bc8167e6addd9f269266aff --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a047ba2fd2937900154cafa04270dc203b92c8336903dda642f1c6a4a4af895 +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..e628874cf7598f5affe8c888d7bc643ac93d4bf2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Softsign/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:760b176c53929f4298b5b55efc6e1d27ef98c2d286639d26e4bda1732f3c9602 +size 130 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2362279e85daa7f86266b94110ac03a45393ccea --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b4446d8a440d02a4f87144513aff79dad3a842733fba7a538d6a03245ef698 +size 112 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..661216331e2853a2df3695ad3630d514d6a5d6d2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ec984ba2ef678c9d00cfbdf73202a3a55e413eced7cee59070dacf12289fbf +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a73d641c5b2886c911a31c445da15067ae54eef6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_Tanh/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185a818f8306ecf022ba3c4c47abb3713a90183f18ba6c05ef94b3aafbac49da +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1ddf94f6832a10e2847a970dcd15b8ed2e49f3ac --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a28c21f42f5ddf77e5dfa53a30faf3cb8eec7362ba6aa5ec029a271afe6e112 +size 177 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..30c5b6f2178f19cdfb8e82adfe6080e78a67eab6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6c64a4b1818021e42b7e82879cfd98c40cc1a7a8e3a8184adffde50ed5894b +size 397 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c91b069d7344989429ef90c84f3b40f4baa6f623 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_ZeroPad2d/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a1aad510e25c9a08f34e47005b46d2bc9391b399e8abf38dea0d517f1956e1 +size 1861 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..188e77b60133af62b8a876fde6ced8fa54060b7a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8956b4c1ef7789f5d2a7525aa55463f079baf262e95971764103a68dc059f6bd +size 131 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..352bf93be84565198861ff81def2456714953d4a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfda7a4dce021837dc31ebcc6f7588e91f672952d2ae676c3e7c02cc1fd8473d +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..98ebebd52d3c9e4c7f6a3f9cda2a015235d71e19 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_dim3/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d5d36fdf64fde6c3659f1076f36dd996d58484441fd738e78837ea16571dc6 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ba6c46d047357c8a8163d4e40a8c079e563b5b6c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9bd6ce0cee451394b4e1a85261ae9b3f1c4103186311c260dcbe0dcb70c93bc +size 126 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..29aee34b1fa9b4b1f27520a734530b81402ec25f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcce89e5c9b6619441e2d099c2e3d6b40e08b45f28e07825862efc8b248382c6 +size 1034 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..abe9ffb9079ce1d128cebe78a63e29f3086045b4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_log_softmax_lastdim/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2a84582dd41fa5b70157866a88543cd53c049d10868fc2db9f61e9695437d5 +size 1034 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_functional_dim3/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_functional_dim3/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..540e8a8d2da555f7133acecc11d78bd237c2d538 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_functional_dim3/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53ed2da67c0dd5c93c3f52eff14752993a493a7d1f201f595186a7bbee408a7 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_functional_dim3/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_functional_dim3/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9237f982ea91f2e15b861d46d09b31aa7aad519b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_functional_dim3/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7ad7133004927055c8f23bb99f5bff7aae2537d47d046586b1c93c3ceb23a5 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2f92ab8cd63ec100d177c368c95f154bfbe122fd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016597f084a51e47e37c126bfbc70e3c2548569943ad943cd66061d5883fdc44 +size 114 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..12455e3818e3319c9ef75b794e189ee97b8bc485 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208fdce8d77c1d6e90d92f306fdf6337afffa138e43aad0b56b7ed03dd8ad097 +size 1034 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..a370794472f683ce8c0edfbe80595d20211ff8bd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-converted/test_softmax_lastdim/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc64e1eda6470d3fe97ea75ec17265932ba6540c0180e8eb60a52c23170c5f6 +size 1034 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..91e1448600c0d053192a7b70c165e61a8df6c096 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32a28e969e4ba81388720c78b9caade5df1b3bdcf3dd1890439fcb6c0ad6e68 +size 146 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..bad6fe24b10445b62f29e13f42466ac0d74cad77 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e2931766b480b4a015213b44fbbbb2e0bc05a90b239270c512207180513291 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..5dddd2281d603b9ab5de5e0151fbb69e65559537 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a527a60ef96fa797a5bc467ecd880dd5401ca79c3730d021d4d0aeb48403f14 +size 30 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6778b2a3379c3ee588ed47ed856edb4904e70916 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_broadcast/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc058fb1e2a988b860e716cbbf86eb0fae8f1ae8d50a8c0058fcb9c477fb30db +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2e887cdd7c8a78489614e6b496fa7daf4b0323b3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43fdd40bdc58814f99d45083aa8c1782d4181394c533776ffe7bf0e67016dc63 +size 151 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..5df9e2264b43c460d8cbb8d2df3dbd219367b423 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7416634ba7f4619abe3c09aeb5fd8a57273921cf2591b6917aa02374b288bfd8 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..d30fb08bb24f16de956843d677850823e179357b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:266de418b18c2df08b763b6589dcedad152d55ccf81da1ca6296688edec56c7a +size 24 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9122ac0202473e7ff807e08151c7ad73596739fe --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_broadcast/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee170f791f30828f693312cc85c4ee956ced35fa57bbb93aa48b76229de021f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..91e1448600c0d053192a7b70c165e61a8df6c096 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32a28e969e4ba81388720c78b9caade5df1b3bdcf3dd1890439fcb6c0ad6e68 +size 146 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..464a64b66f3e99e333dd99e10053f7fb44a2b37a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5c895101ac1b6b962895fd3d44559481d523518b6cac2af282d69b7bfd9440 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..8b13b4c6ef8af8e844fe51175684f97ef3a0b6a2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987fdd6549bd2b74748e0be3a9db16bdedaa20e9a1e46bbca9542f780d224322 +size 30 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c90c6ba3957b5b0d265127c490b3dc146bd90e75 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_right_broadcast/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a0c88ed17c7494ea566c5b9bb6e5f9e36219933edf3e0da9d857f0c157ea68 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..716152d09462b770353a92370abb629dcd8dffef --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4bba28c4211265077b668c2ad11dd4b3fc4617583c4e36657d21e14d735374 +size 151 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e290b343d66e409a1c96dc97c3a72d315cbc61c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0c7d043be3cc02aff6e7a5e5ad53a8f813fd48270e67f4582edff1ddd00db2 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..910d35966bc40f5d3ff41033e90478b4193abfe8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef595df5829db9e0a9c19389bde46c1d72f2c8f1eec62343c81d5ec56f86a44 +size 32 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..652e0a823071d9209577ab4de83c1adaaeb59ea6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_add_size1_singleton_broadcast/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84618a16650ab9be91650b9ba3d9b8cd42cd3ce14f9fc8025d6f9ccb8a98597 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6e2b494bdde5ac3975d3280543aa22899d2545e9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6b202f12d5c44e81f989d3e5d4e281f80efb455406e0e437168b1d0229cc83 +size 158 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..52b9af9ab38845da3696d36e6758638514f49fa2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e02108c80938d21289814b113306c289669c4594be207ed2bd39c780e3fd01 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cb4685fbf32128ce4041ae8e32d2c59834126034 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addconstant/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea4b3411238113bccb144d75350bdd5a52aa885afcf6d7588f711105d6334be +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..084afde18d6a1659db8bb0c21da2fe05db495610 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645ec71f16a780ebdb6ec3eb3ee47d79628f0ce7f6bbfafc1fa14175136923c +size 245 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..5b97a8c02c998130588bd88c7f8341f32f2df552 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c1c8c6e32f2f3758a8d054d563a14aef4135bf5ff2c136361e0b49c8e40afe +size 32 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..025406d6e8df1eed661503a07240a91a2ea81891 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c117964fb86ed70be9cd56ddcb8390dd12c79881a5796b9d8d7b4dcd0c4ddf3f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..6f56fee09b1cd5d7d3ab42d913e35f2fb3a9e167 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8923ded003eacab08169bdc97d04efb9758ac585b58eadf030cd2775a5477a94 +size 22 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ec50c952ea3c123d6eb1a71770fd7692924abc5 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_addmm/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90d0df5bfc55fafc4962d34ec902bd330eedc558228b9a24eda89a89249e806 +size 40 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..8686b32153fb0dcd47696f899f0c818b27493b7e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cb29e392752d93ef4ba8c0ce514ec8b139f91eabe226fabb4946c1d82bae5b +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..a163309cd16447e997e6df11389a759282dbbb93 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a21f96395a4dd2d6754cdb726ce7bd0ff82191e3c7d0ac84369d43434f2d54e +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e244860a4a990f355861478ac5cc194cf93cfd9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_basic/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64541217698e5e14a5a4b5774a25e9953071e298418d177addb84872f4b4b1c8 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8ff12a6230ea03a5f8be090a4d4e07e451ab6ea7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efe57ac9ab9c8322733b62c35b61217175b640c564fcb386b5c8ae6e5fa956c +size 138 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..3fb5c7fed7954534ded7401b770606eafeec5f1c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc4fa56de648e23d09ffb95337d3ebd1aac299bdbd041ed487400f997def1d5 +size 14 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/test_data_set_0/output_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/test_data_set_0/output_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..76d076f955c240176257636b2fb8d87c19244a6c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_chunk/test_data_set_0/output_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452735224a917452cc0660c443b273982eb681306689a9df27effd974e5ebf71 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..adba92e2c561a96a139cc3747611bc97ae219bc4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778fc5195b9d5db7f9e9885194a2a8efbe3d5edb55ac8a874ed1e78a19adc06b +size 126 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5319a675cc614adf964cc1ff2c2dc7c51349998 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944e1a296dddb9b6c0941e2b70351c96d5972084abe077803752d69e3706970f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..4ce7f0573dcda36ae8f2f1f867b51d833dca0fdd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_clip/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1a92a67c157f8728da953f66a7a384018a3b59b60d572ab5c15f61cb9036182 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7f3cf1aaf21912bc69462c4600ea24c661c743c3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd04dc7208cbcbd10f5fd505f895674ef906676431ccef960900957ac823e46c +size 135 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..5b97a8c02c998130588bd88c7f8341f32f2df552 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c1c8c6e32f2f3758a8d054d563a14aef4135bf5ff2c136361e0b49c8e40afe +size 32 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..209062c26aec5da257ae2442fe1396f06a69472e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76b841a3f25cbc02eeaf38d07975bdc1d991ec7f395eea57235af701b2b9bad +size 32 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..edb693129301c6dd4c2c01401091758a14145232 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_concat2/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a600c30de787f3909fd9789081bdfe75c70aca9e81ece22d1f334891d7aca96 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1196a966d7bd5cecd414e1dae10b3b27afc82df0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8686672d9ed2b539b5c9a670d93ce007c569314f6d74e33b4b10118a3f33d656 +size 7746 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d60009db6f493f99ae5f9ffd9625e9e26a2b7fbd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f2215b35016c85c17a5ad4d471c71ebedbf49140a881af0f7f80f023ebeb70 +size 2560015 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..4649f17a8f1d08f24c9bd78dad8d31fc808a1bef --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_conv/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb558f63a7fb8e9193c98103f279861c389d171edeab5e0d4d82a5926824d5ea +size 1896974 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c481bedc29c3cff138a2575c372be2ad6587547d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57fd5707733f1e7473322d19963781d318f42c8cef22041c30b93d04e4ace86e +size 617 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b895c0fd1d091a7216a4cbe7ad82e76d1bd6e8ba --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4457fa4f9c95d9c71d11e0c4a4a397d5091ae99c34253e44dc0e3972dd332517 +size 493 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..54ea5ef434898396a7179e29984c3ff7c71569f6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_convtranspose/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddeaf41fe4dbafbe9821c4d3ef49b081b738f1bf50a0cc5324a4c98207b9bae3 +size 4333 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_exp/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_exp/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b856d52e50112cf4665a56aa57e3780e93b1ccdf --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_exp/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b5c2af9b0856877d830ecb3b440325bb96fbac1958e2e0ae2a5b0940a770a5 +size 95 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_exp/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_exp/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1980b88d464cdf17babf9f068943e728f8a72a3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_exp/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10dd6c7aeb8cff55012458d7a33eb1957d4505ea5ff0dd01e66ec14e458cf155 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0ec7a4895d9b2a3d2782d8a59f8f370ecd575720 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebaea8a9d56651b8ba0454f29dad1e1e4725ae8a54549b6abfa13cf93aa39ad0 +size 120 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6e19184370981214007c6fa14604ed47496d674f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_flatten/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14b75588dbe28a94c3a0802f704b9f6f645f05fd138009d237b3b826a2dc3f5 +size 104 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..456f5b9dfdc5a6f83dbafdea12aa484e15de3276 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275b72eadf19cf0c94848cfc478ce673ad2b4aa51ad3f264b7ed1358b96e2a6f +size 165 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..45a94b35c9ec6bc5579fcd35a87bbd6d8b3ab638 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872e582cf3d9df93eedcfa10415a86b3116912f38e7137b5dbbb540a239ffb24 +size 12 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..60cd32bc685a60421e8e6f0efa54f2bba1edfdff --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_index/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba4178c03e74ec137429a898dc3fe3e4485551fbc8a66133e4430cbda9d1293 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..daa1cd9bc0aeb5c1e4801f69d959e9d0493c8552 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4499da62783819a04e944de2a37b071ce0b13901f0448fc526406f5db6715687 +size 119 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5319a675cc614adf964cc1ff2c2dc7c51349998 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944e1a296dddb9b6c0941e2b70351c96d5972084abe077803752d69e3706970f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b86459025188256d506e4c40844b55ef9c2aa22 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f8008ba09ff2b9fabdfb05edbf198a5a9488bb6c9fe82b8844765338641a98 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..36f5537e1131869de37b4e2000ef0c872ad25a9c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_max/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62cba1b93cf0b7aa55ebfb457acc71b4cef800ae5b51df4b4f5464bd955ada3 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_maxpool/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_maxpool/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd868192c2889a0855d865a6152af2675562c63e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_maxpool/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69afe403394b677ae6dc29d19bd3fb06164a3391c550900a65672926a8d32a16 +size 64012 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_maxpool/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_maxpool/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..86f7f041b473a54db0c5756feec3d52e12219a7c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_maxpool/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a0bb0864762a1813781eee67b5346088f3540f875a588e6c792dda89d3575ae +size 30732 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1b2e1402d5daafb2a3d306acc91691a15046fb3b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebc29a395593453c5a039e44a64435f33067ad3e1fd6070138b40e98c93132f +size 119 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5319a675cc614adf964cc1ff2c2dc7c51349998 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944e1a296dddb9b6c0941e2b70351c96d5972084abe077803752d69e3706970f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b86459025188256d506e4c40844b55ef9c2aa22 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f8008ba09ff2b9fabdfb05edbf198a5a9488bb6c9fe82b8844765338641a98 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c595a8b588c6fb1a109f9944f3b7a4742dc851b6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_min/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9273b65cbda18dee810af9e30bb28c0060b834d993b0a1ddb7ef768d2f48e5cc +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e20d6714100844f4ab6a9c6bef66fa5faf57444c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd53be4a44cdf737123b37620cfec0ed4c47e8d5042cf727bdeb10d83706fdb8 +size 214 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..5b97a8c02c998130588bd88c7f8341f32f2df552 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c1c8c6e32f2f3758a8d054d563a14aef4135bf5ff2c136361e0b49c8e40afe +size 32 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..025406d6e8df1eed661503a07240a91a2ea81891 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c117964fb86ed70be9cd56ddcb8390dd12c79881a5796b9d8d7b4dcd0c4ddf3f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5af018c3c78b82bbc7939e2f5eabe04c6250619 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_mm/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9959b921bc5c15c8671f3af66a38088b7fbe4d7f61a99fb0a35ea820b187bb +size 40 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_non_float_params/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_non_float_params/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..dd19453546fd86e01b8498c47239c8049acdbbc2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_non_float_params/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:485c66a0c1b5abdf83d03a351f0761e86b9aa44e821787d4bc5145ccbfda5465 +size 181 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_non_float_params/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_non_float_params/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f200c714beafaf9f23f5cf5fc1fa8cc58e3d5972 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_non_float_params/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83fab6fd56acbb4a476a49755e4a9e1aa6779b15e2ca483eb435aac7cfd5433d +size 40 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..94059172d5cd70da22b43923eeefc7c181febfa4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8176991317b9ac1f5e9ee1a18a89231102086b8f74a44a051602b1a0e1294aa0 +size 159 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f208ca6d59f979646b2310f92c404636d89f53d3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0f9f2fab2c3f55cc6d50af5534ddf730ab2bd41cc81326fd4308cc36b88384a +size 44 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..cc245536a07c50d3d78a2e5d48f50b33c1b17f2e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pad/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49eb16e63c84f9dbb5422e9ed58042fa28f20c10a2a4ad2bedc17387d965a6a +size 120 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5f2c44c7587aa717916504e0b920329d47d1db29 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4618f50b075490691d11a75161905d979de1b3e12f20f892255039993cb4e0eb +size 209 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..68eaacef9ba89b958006899bba09af456f6281fc --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e839fc56e801da9b602fddfe6b9c65f1fffa28da6ab3721d38d9fa580d2b952 +size 24 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..418789b6eceb218badd7cc411875bfce183b0b38 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_params/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56a8748b6ca45a8a4550531a8e494218b1ca8dca85dd8a288c769c98a30947c +size 24 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..90f060074568233a028c48f26ed7a7393d9aa662 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0aac91f4e4b02c2fefcbefbc18ebbad361c2a5ecee02dd80bba13bcf107ad35 +size 157 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..76b2d1906a6d0fa34ca0213cb264a1bcc3f92083 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff34379c6ece81e777fd0c7de45516f9260ef7de66f3cf5a8b262e9ac905123 +size 20 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..76b2d1906a6d0fa34ca0213cb264a1bcc3f92083 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_permute2/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff34379c6ece81e777fd0c7de45516f9260ef7de66f3cf5a8b262e9ac905123 +size 20 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ff046e07c320afcfe1e4cd48c46de668c71713c8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b5cd9141807d9e44da79058d65bfed25ef2bd5076f725270e6828b1f10bb21 +size 143 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..4e1aa81748ce990f96ca93d89e2f230a37bb2c32 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0b7e8ef88dce493e1bdbe5a310e53860b2f0a4825bb53adcd4c1c209ecfc3f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c5ec3cfdfcdffa731e5dbb26a247fa6ae0cac7d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_pow/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c437b01b140337678ffc22d8dc9261750cebe621a4333c72d15ae9e40444843 +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9e460748aea0701925bd6f02d160d866d1d5fd6c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b520425abc7a0320ff2475963531e99f2dfbc469b3d2db5a19e70a07a945a40 +size 144 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean_keepdim/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean_keepdim/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_mean_keepdim/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e40da729e542a416e414df4532acd8fa25fac4e3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fadbf7176846be0b2930f0b54ced7bc668a45b21dac12b889a9935b187c3818 +size 147 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ad3314834c407bbf8256f89d1ab16c4ec4ee8b1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_reduced_sum_keepdim/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4cbd3a0c1b05baa454e208bb8524fc7191f61b14083dfa4c0579ab98c383217 +size 44 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b7f71e4dc0d93326e2b205af367a4313eca20f06 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45024e66e34011dc16e8f942925c4a94686d676da617c902a68bbc1402255b25 +size 183 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d92347cc0c5b3c9aacb407ac8326044254caf753 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9894e282d63106e64582f3a3b0ce39d628595ace004650f6ebcf8c2e3863597 +size 2317 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..29f04edaa11ab47c993ccd9d237a3f2bea1f6c7e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:988af83b725e7dfdc78cd70b3ed46074da3cb2acb78a3fc4991c2d934fbf2e29 +size 262 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea35ea3a9b3dbf33f46552dba9ab490ce1a781f3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3de7968d997ab3ad7841ac292a54f369ad8cfea0ca92e3bc0416b3faa7e4f2cb +size 16 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..357658d364f4468d78fdb24f9f1622e0d161cc52 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_repeat_dim_overflow/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dca7d969fdad14155aa62508cebabb3ae64dc8111c7e6eb367c9044423100df +size 205 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cb0b674b354f88fa42923691dd0f22baba511e65 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6a8a5c3b792b00480f9a35d470ed332a082ed591d78ffda0f1ad38dc83b9ac +size 112 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..62563c46e546efed55746645afafc2a0bd75d0f7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc3e62692125d1db6c9003ffc87c131af57d753699bc19ff0f0366b2662045f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..827deada870fb735f206ef0ec9ed5f2634c14ba7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_selu/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81dffb4efb1f2bac33245d5cb56728500afbfc40228bc780af5030d3ca80ad4f +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0a4a32c3846b33aa674de0c473965e152d3d0cfb --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e9a3f0a7f0bd21baca809908bd515953c2e1812c16af66f47ed4439fd879cf +size 96 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5319a675cc614adf964cc1ff2c2dc7c51349998 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944e1a296dddb9b6c0941e2b70351c96d5972084abe077803752d69e3706970f +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9ff7678877f03fc96b9b1a413ea8a2fc51990704 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_sqrt/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7761216d6701e85081eb26a93b78e31c03bcdae4d695f7bba8af7db9f7187a2 +size 56 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..463328928325eca38a4bd8b4c9f55dd60fb7b603 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dcba4239490d9808912d8dbe82fbd4b54e983ddc145656199425c2ef406c819 +size 291 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9697345162bc101f56d4c61ebb6c1266fde0f0d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b56f70c52f4016af37eb818083d99fed9aa506ceacf55c47ef2c6ca75e293c4 +size 81934 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b169a94d55178adfd75d331f857e2f30cbe7ccb8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eac75cf668adad2964c7c94dcedc4167fbed2a78282abe4c1f6649b5f42d564 +size 81934 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..066b587d6d3b3859ae434b6c630f8a7d256897fd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6391e658fd3db2a43c0fceca282b8dcda7030054d7bd3f5ddaaad3d5442a085b +size 188 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..ccfa1510698e1263f2dcdcf2a3ccf80ff4fdacb4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83817f048d5d2fd670eca44d6bcda1eeaadcda286b68fce894432b6922373eae +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..76d076f955c240176257636b2fb8d87c19244a6c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452735224a917452cc0660c443b273982eb681306689a9df27effd974e5ebf71 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..9bf70d1f497256a66ad77c3d89a1d9d800a9ff17 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166431a361ddfddda5930282813b9b1d1ef87c81aa4837abc94adef19cf65db0 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/output_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/output_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..0beada33227dfa5621342eac834ce9f2041b6f3f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/output_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb0d646caa7bd9abd9bfddd072f9a3b0fd6175e97074c10466413837605b2a7 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/output_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/output_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d557463d4196e4804f6fb86921df141c945c28a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_symbolic_override_nested/test_data_set_0/output_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62099674f5d96ff6a75cd79ab1b036019e6fa5de6d281003c6c3aec12064e963 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d87d8e2d7d9f27de1d1ffee4ce784f3a9975b67d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3ed32ba38fbfaabb3b76f7f3df0c174976fabeff635a851bc0bc4c21ac339b +size 108 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..60cd32bc685a60421e8e6f0efa54f2bba1edfdff --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba4178c03e74ec137429a898dc3fe3e4485551fbc8a66133e4430cbda9d1293 +size 10 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..45a94b35c9ec6bc5579fcd35a87bbd6d8b3ab638 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/pytorch-operator/test_operator_view/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872e582cf3d9df93eedcfa10415a86b3116912f38e7137b5dbbb540a239ffb24 +size 12 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..07c1ff9c466ff1be9fe7db4cb8cb6f6e18669e13 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4635688307af248c7e17f34246b07bf2edf0cfc278de58b08bfe155d9e734f73 +size 132 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..419a378c8accc12e94e38a000aab441990512a14 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e69c4f4256a5137c278fb44972fabb9205abcaeaa733624e0e63e7e537f685 +size 25 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a318017282ed9c0a76d5724572e92a1866df3ce --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa41683544eb3ce75c5000445edcfc63426f4d8c9a40a0d636822586575175ed +size 29 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a8e2612363da0351b10689aa011f44256cfb521 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model1/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3abcb456e21b5a8c5042514dbaf7c72b1b12b6c5d4c7df031ed5752fed25ffc1 +size 25 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5897cf28d086d968719fff2d270414d9787064b1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f3017c74d3b1a92f1186001d18b1d11c395252cb5f660b1ff2c721ab90fdd2 +size 132 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..419a378c8accc12e94e38a000aab441990512a14 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e69c4f4256a5137c278fb44972fabb9205abcaeaa733624e0e63e7e537f685 +size 25 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..a529a91413e0f904c1a4abd6994b5c7fcf935c6a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed123191690318d9a5da9f1e6dcca495d5a419a82313cff7807be3404ae5ef2 +size 29 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..58c27a912dedcb4c167e9c40a4a45bcaabbc8b14 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model2/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fafe638847302c7b45977acee1ad86f58e9d3aaae81d3601e28e4dc35489b0 +size 49 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e1f851d1d2c121155880813910cdf2858bcb5a86 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b65bb3af8af5658e446e632afbaa04c986878c27e0d96984ce61c13912b8683 +size 132 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..419a378c8accc12e94e38a000aab441990512a14 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e69c4f4256a5137c278fb44972fabb9205abcaeaa733624e0e63e7e537f685 +size 25 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..88e84a67b3ea272b9a74eea248698e85358005d1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74264f2ebbc5be664f882a860363533dca70e275aa5d3a01a50ec9fe361cecd +size 37 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6c035da6a7ce297f3b5b09ebf9f38435bcb944cb --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model3/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7875fc93279927da4b9bdedcd67af21c58b3dab9eaccb3408ad9c011b86327 +size 121 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9834583a415a15b4a996931da67f3efe2c693780 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b0adea1c6c1d7eb38fb06160870556279abdbdf1244395860cac35ced0fc83 +size 136 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..419a378c8accc12e94e38a000aab441990512a14 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e69c4f4256a5137c278fb44972fabb9205abcaeaa733624e0e63e7e537f685 +size 25 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..74f87f06fc2f88c687e3e5c8512872277e3ee474 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3b935cf7f69287d48ac5f1c6c3c88ecf6419db8ec7b9142a8d062ba3347359 +size 45 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..e7099dc2759aeee8ec4d7e7585a9185dce29fe0d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_expand_shape_model4/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9e33fae9dc3761385e9e860a8d0915d27436f8e0ce586bf35daebe4d18ae78 +size 340 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1bf776268e9ee56aef8c2d8a5b8a7f5e386674c4 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ac4a77e14aca10b18ef794257624ec05c45ef647cb468a1b481fe80cd8cfbf +size 264 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..eba0fb76defda58baaaa904529711510a14ab135 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3592a1813d96f7ac3a4f03d4f9e82bd95ecdb2698b210141b837d0e7976e4cc +size 11 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e96db45deb00d3a7a7bfe0184dd26b9759554caf --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0fef2d265fc51c3413cba28cc50fd008e4b138687d6e877d6666655d9ae7d6 +size 11 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f24677ec5d944b1cc40f374db1ff8a3b932ced6e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328a2912aed8be6948896a7e50b9d943b72060bc9e4d3c6a3b57aa54d9fd2908 +size 11 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..338b1530965d0e1180afc76c8b0bc22e2658bd04 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628e2a70ca237069362593d246970a498fc15a46ca719a11b82fa74f93a50993 +size 15 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0afd187fc01e877b572a0a455b950fa04bab640 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add/test_data_set_0/output_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71c756a6abd3f5f9d3cadf4b4da6711d367f9276e3ab7a671e3b0f2e5bc0b79 +size 15 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0da4b034dc8491b4189a7fa59d2039ddeffbdb10 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58dc697eea329aedd0d5292bb2c550001ab9e5e6b71a809c196c62eda48c1c97 +size 297 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..eba0fb76defda58baaaa904529711510a14ab135 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3592a1813d96f7ac3a4f03d4f9e82bd95ecdb2698b210141b837d0e7976e4cc +size 11 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e96db45deb00d3a7a7bfe0184dd26b9759554caf --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0fef2d265fc51c3413cba28cc50fd008e4b138687d6e877d6666655d9ae7d6 +size 11 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe9f66d4da8ff423ec6f72a15188c7361f1493b3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f4a57187b16403ade1f3fa5f682177eedc86250c5cea32bc226f788dcc4d67 +size 11 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..91dd84403aab9263c81580fbf9bc8f634fb8481b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64852bd25350c683afc88bdd16390a364ff1d74274fc497604a842d553b90aab +size 15 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..6c28614043c0df2b56e9d8c1d8d3c419812debb6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_gradient_of_add_and_mul/test_data_set_0/output_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7c99be696eb028cfb6b893020721c179e5c071788403856cfa80f30cf3d4f9 +size 15 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d3952f49eb6123aaae789e666a8880d23e108e51 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71145a28844efac2df0598f0b96a86cc0a7cacf5901b36d657c09d72075ce19 +size 371 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cc8de9e5ba4d3b9b6fc8858da202a83fa6db5d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11d889aedb20c2047b85397d60ac5ce6f94dfee4ae23a3a0d3a26f4b64529a +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3cbb6a4570afdbf3bdc1e5477e74b56b8ebb40d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c5942492648da7a01f25995e4d53ce652fecf8f603a163e0a6aef8b0faf304 +size 61 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..b960c25bedf2e62e0677ae1831156a6299ecf350 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2373eca6563c96e303e79c81e1679fc129a5b2f163acaab01afe66c9ad19da2 +size 158 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..388e37c90f7075111a76315ff06bccec9caeb15a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model1/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee606df9b33b12e901190cd4beaa32c4bf77e1bd8bac2903c7f237f8f01a5996 +size 63 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3e410ec763a201061ce1f4f4469355753c65cc29 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299cd904657eb43bff6e45a8d8a3a38b1afc60a6c75d2fe8ae7c2cb1d3cfa636 +size 322 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cc8de9e5ba4d3b9b6fc8858da202a83fa6db5d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11d889aedb20c2047b85397d60ac5ce6f94dfee4ae23a3a0d3a26f4b64529a +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..ddec2d5685dfb557875128e4b1c4dab132ae5406 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418b83405698264fef64e949bceac58a118d470621408bf15b9f62fdd0c56af0 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..593a352db68a5c0ea3da20783ae8ccc39934f7e6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2213e065346a2e637ebacc5b10d8967442cddc9da901ef8fd026cd79f46aa8 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..85a56e2652c3572e5408cc5857ab6c5a24af21c1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model2/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d157e1a990c2c64330c37905f021ccb1d3d0d9a7baff71c7ad619f514521731b +size 111 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c97c61b45b5619d928682e7de6422e1ade76e8b7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27daaf8fb59a6174df24959bc2568e66aeee0926357a69d6bfc3ade1b5735668 +size 437 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cc8de9e5ba4d3b9b6fc8858da202a83fa6db5d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11d889aedb20c2047b85397d60ac5ce6f94dfee4ae23a3a0d3a26f4b64529a +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..593a352db68a5c0ea3da20783ae8ccc39934f7e6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2213e065346a2e637ebacc5b10d8967442cddc9da901ef8fd026cd79f46aa8 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..85a56e2652c3572e5408cc5857ab6c5a24af21c1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model3/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d157e1a990c2c64330c37905f021ccb1d3d0d9a7baff71c7ad619f514521731b +size 111 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8e612227b5f7798162eabab23431f7d24d8765d1 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d67c440a8bd82e0ce6ab766a70052f45e45b8abbdff92b1ab381faf47421e0 +size 219 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cc8de9e5ba4d3b9b6fc8858da202a83fa6db5d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11d889aedb20c2047b85397d60ac5ce6f94dfee4ae23a3a0d3a26f4b64529a +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..ddec2d5685dfb557875128e4b1c4dab132ae5406 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418b83405698264fef64e949bceac58a118d470621408bf15b9f62fdd0c56af0 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..593a352db68a5c0ea3da20783ae8ccc39934f7e6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2213e065346a2e637ebacc5b10d8967442cddc9da901ef8fd026cd79f46aa8 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3a4cf7990b14b2664df75acf573f8c65645d152 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model4/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67e4018fcec362c14ef3bce18cc30dc62b42ba9a36457b272db880890c3fe39 +size 304 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8e25a4f4403e93be0f451e9f96932bff16d255b5 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58a967fe27c96880158631097dc181ec97225ea33035f68dd42d5de016b21e5 +size 251 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cc8de9e5ba4d3b9b6fc8858da202a83fa6db5d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11d889aedb20c2047b85397d60ac5ce6f94dfee4ae23a3a0d3a26f4b64529a +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..ddec2d5685dfb557875128e4b1c4dab132ae5406 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418b83405698264fef64e949bceac58a118d470621408bf15b9f62fdd0c56af0 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_2.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..593a352db68a5c0ea3da20783ae8ccc39934f7e6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2213e065346a2e637ebacc5b10d8967442cddc9da901ef8fd026cd79f46aa8 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf48af7b66576c8e22c2b237a123c5bf1451ccb3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model5/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3346d3d018be3262bbeaaee0fcd1876411ad06a0884643841db67043138c4b83 +size 306 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..70ae381c3ce43b44ee502b7be09cdaa0103979b6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:240368624bf2dca1f0bb4390a7bfc939e6e4daecb2c179c5d643a1021f2ff80f +size 156 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..7cc8de9e5ba4d3b9b6fc8858da202a83fa6db5d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11d889aedb20c2047b85397d60ac5ce6f94dfee4ae23a3a0d3a26f4b64529a +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..97590181c4b64a310998bee3561299e22616083a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model6/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59085bb1db26b263d11dba6a7d9d8c892d0300a7dd7fee5ab51889dcf4126bc2 +size 17 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..efc319043d21a64086ef3af02c08154be3afa3c3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f16d080a63c7e08f916f23e0d6c500a92f20f345632376f5c3a2b05fc2d2b612 +size 209 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..26056515251a89d4afb0a3e4c13dc12dfa41a118 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8836f604417aaf5b3f22f9b695a9a2d827799ae9ab007fa20ae7dda93599b558 +size 206 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..d31fb4f30886784b3a64b61d24602c2836a5db0e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model7/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8510e4e3dcd428afb3c7a915842c8f6d45bc1806c7da81e77b8501119f9a6cf9 +size 109 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e7707b79585bced3296c16c2f23f895f659aa567 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b911bee0f943336e3e485d3110c8a09959d17bf793a294e1e623f00264499e00 +size 157 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..9af372f7a2a74a4914574109df73668cbe714c0c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32772570c50137ce977517422210db8b0f225c135563d9b6ce6c8552794c2ccf +size 9 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/input_1.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..6bbcc2bb5739af00da55f04aaf26ffe01ac17add --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec426005ebfc242ed5d11c3f83062fea9fff5a7ea7b47a22b0f80e5e62e5ebcd +size 38 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9df237917a188a7c1593cd3904d0ec23514aa0b --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sequence_model8/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0c4b325adcb2005a5f258b9b99b9416d4f8757b5c41f15a0bb139e9f86f532 +size 17 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_shrink/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_shrink/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1e8bc6679384a2df59d3e2bc6ef148916f830fb0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_shrink/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c36d1e7550cbb85611ca0fe64c54de5f6f1f0c8bdfe44bb60e47e65f49d51b30 +size 115 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_shrink/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_shrink/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..8c67b073d174cf4ef85ba3086ca857664f0bc79a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_shrink/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b429957279155bb62da373182d2ddc6f446616540ac77efdf9f25d10fd4f62 +size 29 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sign_model/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sign_model/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..521daff72f69508a53313904c255da98d22ad67f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sign_model/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398a0fc7a1ede855dd7e9b335339df387707637e2bb1c53cdef82494994657f0 +size 37 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sign_model/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sign_model/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..939daf2ad891e6f1ab9713e367159a0303e94073 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_sign_model/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e2b0525408f6d1f2cb310d3f2ab8deff83827690873a911709c84e2a40ed26 +size 37 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..67182b4193ae137b3e0b453a3eac058f25b74863 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35b768e076a0cdda9c7dcf3a0f3ecbb849396b2f715fd442c7705c7d1fb473b +size 98 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..022f78462ca025fd7d58c8318bada6d20eaaef19 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf73c8c03bf97a56ec4f29558c4226ea4cea6400f0ca7ef05c538a6b39063c3a +size 19 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..53435adb47df7ab764abe1ffcf16b72319dd3e24 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_single_relu_model/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f28d5710fb67ae20bbb33ff19076483edc6e305083b65c4e2774a1993f95c0 +size 19 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0ccce556c294646c9dab3277675894268adfa1a0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604278554856080e7c393b85eeb7390986b34b0c83be6fe174d6614894523ac1 +size 185 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..af9e40c3e33302ee98103cc12c663c3f4f3b0e85 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac5cb8ad56d2b68d7675fcbafe590a34273cb3511a6dbcbe4d935f6f4174abf +size 45 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c6fde3a67d4a1e598645d767d3c4d061e0e94375 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_lower/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe986459ec6f48f62031d8ff65324dda1d46b03f65ca8cef619f67e947ab696 +size 37 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a42c308e097542d2049032c2870ece974994f28a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:721af59c24e1a7a638579db30b65ab4d6509d0664920928238941307aaabba20 +size 153 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..af9e40c3e33302ee98103cc12c663c3f4f3b0e85 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac5cb8ad56d2b68d7675fcbafe590a34273cb3511a6dbcbe4d935f6f4174abf +size 45 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..c6fde3a67d4a1e598645d767d3c4d061e0e94375 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_nochangecase/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe986459ec6f48f62031d8ff65324dda1d46b03f65ca8cef619f67e947ab696 +size 37 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d8716587f8960deb1ddeb6c6c60a4d70f4d1f4bc --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99390c7502fd90ac8583ec17ad119c1998e9fc9904c0b641f96bf86d3b5e5766 +size 185 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..af9e40c3e33302ee98103cc12c663c3f4f3b0e85 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac5cb8ad56d2b68d7675fcbafe590a34273cb3511a6dbcbe4d935f6f4174abf +size 45 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..1f43cbe8143bcfd309d8c35f25484046a8d620f8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_casesensintive_upper/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed993e21e09aa8709fb39675cea1e9e4444a30006ef1e31155560d4cc2dd15ce +size 37 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..444a6aa57658a9b114b2c3c873a9511595d359bb --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb4233d261f80ae3d8bd35748fd7065b663b0a29a1d072ec9a65795fc5b9490 +size 185 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f01070f8b9cc90e8280bb6d82d55022577a9c5d5 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6a24ea9cee99e598996eb561c13680ee3d1593f81e13b92fb55089aa1fa284 +size 23 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..b4c577a8755d9daebf3e6a20c5c53f8218fc98d7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_empty_output/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef7b8d66a93235d48ee45287fb05ecba46d93b4afdb78fcc55f716892cfcd0d +size 9 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4bec97c9ca7539d64015e24218bf017e44bec2c3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00fc455335e727394d5ea799fd3981115398aa85a11763b261992404eb2703b +size 167 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..2fc15b6db50d99cd2a430fc339c219572ea348e8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f72b47a746d25e8f13765fd26d02d5678fbf676c2ae51449e4811c3e3e7f1f +size 65 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..da2f0599714b06c12c594a1b0d6d9d0abb0c60bd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_monday_insensintive_upper_twodim/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad3fe4796c271e991b7e46bcd02bb30f8d7390c2a6c405e64271899ad496fb3 +size 49 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/model.onnx b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d09dbc982cd44e7a9fdf032d665f9849784c512 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d59a71ed61f4d50d3d83bf1f382b2aed11611ab5353749f9e53c65c3486846c +size 128 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/test_data_set_0/input_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..03f4e1303622f8cd1734bab2bdcf608d0b8738f3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b811eae0cf9a95e015b3f667c90b72f98cfce726bbc0ae83a7b4635e4d1b519c +size 24 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/test_data_set_0/output_0.pb b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/test_data_set_0/output_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a1714d914d33a0235851f03330a1e73642d4c0c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/backend/test/data/simple/test_strnorm_model_nostopwords_nochangecase/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3618c65a33d5c66911d1fdf98217f1e98426dc8b3612e7bd2d6082f8d690a906 +size 24 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/reference_evaluator_test.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/reference_evaluator_test.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cf4b07de1f3b63cc90ab6fe51d82c5e2742ab9a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/reference_evaluator_test.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe581d428b7701ca961a4749c4368fc24563d808e501a530a9ded722d2c3f5d9 +size 125010 diff --git a/pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/shape_inference_test.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/shape_inference_test.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f7a6d0ad8a9efbf9ce230dc8d4d2c9ec6b037b8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnx/test/__pycache__/shape_inference_test.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e2a424a20c7c3d447c96a50916ad3d90954d9a288b81dc8a9659b0430e4b68 +size 207385 diff --git a/pythonProject/.venv/Lib/site-packages/onnxscript/function_libs/torch_lib/ops/__pycache__/core.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/onnxscript/function_libs/torch_lib/ops/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e825e8e5cc76a7ff9e5be8d07a38bb948c99108d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnxscript/function_libs/torch_lib/ops/__pycache__/core.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521ff3cf17932a6da91957d97f44c85edc595968e1468560da39831a92f3fef0 +size 235402 diff --git a/pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset13.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset13.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25d43c14b7ed499d0e7c5439595473c7a00de735 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset13.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7823776916087ff442eefd61666209bb8759d3369ebad9c78513b5d4683ffde6 +size 126004 diff --git a/pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset22.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset22.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f46d25bcf9a0f2f42ec0f7502789520807ed94c9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/onnxscript/onnx_opset/_impl/__pycache__/opset22.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3cb2481963f96a5fcf7870a8a6df043b4f0bf3c6be67e34228c19b32fe47307 +size 104120 diff --git a/pythonProject/.venv/Lib/site-packages/pip/_vendor/chardet/__pycache__/johabfreq.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/pip/_vendor/chardet/__pycache__/johabfreq.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d50604d7cdc0e2d74b2678bd8b0eb7f7f7ccde8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/pip/_vendor/chardet/__pycache__/johabfreq.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5236e645eeeb8bc7137609be2a3af8d5fb2fc50563ff8ef939bd599b0e2197b +size 138743 diff --git a/pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe b/pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe new file mode 100644 index 0000000000000000000000000000000000000000..bc02472528a32bd70ebfa2b0eca26e34a83fa264 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5dc9884a8f458371550e09bd396e5418bf375820a31b9899f6499bf391c7b2e +size 168448 diff --git a/pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64.exe b/pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64.exe new file mode 100644 index 0000000000000000000000000000000000000000..daebd1c30353b2d9b016069da83ad5b4c8ee86bd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/pip/_vendor/distlib/w64.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a319ffaba23a017d7b1e18ba726ba6c54c53d6446db55f92af53c279894f8ad +size 101888 diff --git a/pythonProject/.venv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d01b1a7dde708c8ea33b31872c6c75aedc47570 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c727b191ec63af5807be5e300b4a86c1ddf83335a0ce44aaabfa0a1cec89cbf +size 176441 diff --git a/pythonProject/.venv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d48f4a5192f1906cf940896226f8a6ff83fb0f57 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5077624ec22dc73367ff65a67acd04e578a3868513b58011b71492348a7d48 +size 360039 diff --git a/pythonProject/.venv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef4c779662eceb5acea7811a9b62ca2aedc44aa8 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369ed941e890186299563803cc026412f69d41c9458272f61e23f349a06432e4 +size 111077 diff --git a/pythonProject/.venv/Lib/site-packages/regex/__pycache__/test_regex.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/regex/__pycache__/test_regex.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5e8bc1fc5eb753b053e84ff3c552e9203d6a1e0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/regex/__pycache__/test_regex.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d12f6b58c4303b62e6add026f5ab16716a40885b13ab254d9b5b803ca205bc +size 144984 diff --git a/pythonProject/.venv/Lib/site-packages/regex/_regex.cp310-win_amd64.pyd b/pythonProject/.venv/Lib/site-packages/regex/_regex.cp310-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..aa7a60c1971f31ac5f853be320334061c4b0a61e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/regex/_regex.cp310-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f5a9981b278361e9b2d0e842d9875accac182079e748bcb5a37accf27fbea7 +size 723456 diff --git a/pythonProject/.venv/Lib/site-packages/safetensors/_safetensors_rust.pyd b/pythonProject/.venv/Lib/site-packages/safetensors/_safetensors_rust.pyd new file mode 100644 index 0000000000000000000000000000000000000000..bcec2b917bb60754bf43959eb9bb3a29facc7968 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/safetensors/_safetensors_rust.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3996e3643927a18933515d7ae0e028c177ae3816f75eac6d17c3a4d924facc7 +size 704000 diff --git a/pythonProject/.venv/Lib/site-packages/setuptools/cli-arm64.exe b/pythonProject/.venv/Lib/site-packages/setuptools/cli-arm64.exe new file mode 100644 index 0000000000000000000000000000000000000000..26794aecf5a30b3778082d327f4ee461ce50954e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/setuptools/cli-arm64.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3d6a6c68c2e759f7c36f35687f6b60d163c2e1a0846a4c07a4c4006a96d88c7 +size 137216 diff --git a/pythonProject/.venv/Lib/site-packages/setuptools/gui-arm64.exe b/pythonProject/.venv/Lib/site-packages/setuptools/gui-arm64.exe new file mode 100644 index 0000000000000000000000000000000000000000..f7ec8af6e00c532d2aa99354e3adb869b447a25d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/setuptools/gui-arm64.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c416738a0e2fa6ab766ccf1a9b0a80974e733f9615168dd22a069afa7d5b38d +size 137728 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/combinatorics/__pycache__/perm_groups.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/combinatorics/__pycache__/perm_groups.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd216ed737469b27ac610b44586fdd8f8a3d8e62 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/combinatorics/__pycache__/perm_groups.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33061f5474e959c3ce693b2266ccf8d784f72b41dd3085575c93c5ff9d82e795 +size 152910 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/expr.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/expr.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e661104bf25a8ccaeac89400e91d6a7ea883d48e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/expr.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065dba7ff30c511fb19ecd64944f25f3cbc143766f5f072397cb1ba905b74b0d +size 114998 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/function.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/function.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5dd9bc5a8dd4fe064f7cf3e79d38376cf349a60 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/function.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e2f0d58f0dae0f4a126a563110e4148600b7b9b6dc864d6af16b633800255e +size 101043 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/numbers.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/numbers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea4562ab43aecb538fe44fdf2a6f88942dda72c2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/core/__pycache__/numbers.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea50f9669a3ef8f9d5188170b0663f6c5ac2a97d21725c3b48815b0902e4a1b +size 118110 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/core/tests/__pycache__/test_args.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/core/tests/__pycache__/test_args.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7aaf77e4511ae02f5e5b84e205beeb5ea2a38788 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/core/tests/__pycache__/test_args.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de10ccd60663eebe343fd1df50e6ea33cb2c7a9c4765a34000ce594da98f6f6 +size 220805 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/logic/__pycache__/boolalg.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/logic/__pycache__/boolalg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f33f3b1c619a39febd1529a32a2fb0c209ffcc84 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/logic/__pycache__/boolalg.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1509e72fc5edef3e4abf383ce66d60ffea0314971d2b5ff46fc419d01923d6eb +size 100321 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/matrices/__pycache__/matrixbase.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/matrices/__pycache__/matrixbase.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3ccb67e1faf33e3f7e5bf4cde9861860902deaa --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/matrices/__pycache__/matrixbase.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d3891ba029e9449dee219144a630c3119a0ac08b49a04f4a1bf55fb5c8e7a9 +size 164798 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrices.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrices.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5c5538c8acb25d97a227c5d2a1f6c95627a3650 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrices.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0576d158d7a0f80d782cf08154e86d895e788c17bee2fe016fca34d1627ca48 +size 144481 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrixbase.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrixbase.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a55432d3bef49acf64d8a8009406b5d13e052248 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/matrices/tests/__pycache__/test_matrixbase.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ec11764665b0e1f95f41f205d62ff6f3b028b724d6d4e4d21fa3aca667a1f5 +size 153939 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/parsing/latex/_antlr/__pycache__/latexparser.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/parsing/latex/_antlr/__pycache__/latexparser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff6b190da7b5e5f181851cdea46cf2a4d08ae8c6 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/parsing/latex/_antlr/__pycache__/latexparser.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33aa363a710c68b9232e5890af7f641e972672853760c60a3904e1fd4649aee +size 109820 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/physics/continuum_mechanics/__pycache__/beam.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/physics/continuum_mechanics/__pycache__/beam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a6639ede7b73950747486ce89ce7641f172af14 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/physics/continuum_mechanics/__pycache__/beam.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a19d4d97daa23484bc65f1acaae4a56645a9cb894e5480bf6d3cef34e4cbc681 +size 122357 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/physics/control/__pycache__/lti.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/physics/control/__pycache__/lti.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4c373f835cd0b224260c5f81e4d89fe70856bb02 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/physics/control/__pycache__/lti.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bada045cec288538112808847b7a244528c11003b57b538732c812f09265cdff +size 154413 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/physics/quantum/tests/__pycache__/test_spin.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/physics/quantum/tests/__pycache__/test_spin.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b3fd2cc922fc805320377086372ca84374df224 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/physics/quantum/tests/__pycache__/test_spin.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2acda69839d889f27eea8dc6337e70aebc798a01a4c8d0bc56433ea40d9ee9 +size 199583 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polyquinticconst.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polyquinticconst.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..098ae22801e42d6adfa17ccbf29dcfc8a62e8949 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polyquinticconst.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47df5af7fcc8abb9d5fb965ea5c3c3e61edb37a7ff7c2d6f4985210420c2897 +size 132098 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polytools.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polytools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f762675be7c07f91c86ecf0e131d999e7bc5972a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/polys/__pycache__/polytools.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef56fe974a7346b5840e752ece59be6aa46fa208f31e2a41ead095ddcfe34b6c +size 186797 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/polys/benchmarks/__pycache__/bench_solvers.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/polys/benchmarks/__pycache__/bench_solvers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c936b9200650fb0e35cbede3b3980fd0491594db --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/polys/benchmarks/__pycache__/bench_solvers.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5a6656256d1bc69fbd379c40b969b5c876e746fda47519bd990daf89d59b70 +size 334853 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/polys/tests/__pycache__/test_polytools.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/polys/tests/__pycache__/test_polytools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4779a098485eed31f69baa26e89f4ca9568f673d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/polys/tests/__pycache__/test_polytools.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62fe41477955eb530f2a452a4ca06d0a071881da5c3f2300ab611d76a30fc5d5 +size 141257 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/printing/__pycache__/latex.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/printing/__pycache__/latex.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e8ba38677b27e8598b4cb4e80b57fde13e61600 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/printing/__pycache__/latex.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff3179543c775208fb035bd7fe2a41feb3b9e4ffb480d97d49044a008aab2cc +size 119130 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/printing/pretty/tests/__pycache__/test_pretty.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/printing/pretty/tests/__pycache__/test_pretty.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ec5e70ce3960d0f1a9a4a76ab9448c2aa0ae4a2 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/printing/pretty/tests/__pycache__/test_pretty.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a58e50623fcabc10f10f32fa756078163878cd579b9958db6361543ce068cba +size 154768 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/printing/tests/__pycache__/test_latex.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/printing/tests/__pycache__/test_latex.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c224c23e9489143b8902e24d3fa37a6857556a3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/printing/tests/__pycache__/test_latex.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa8f459d1f7678cbafb6fcbef59ebe57e76cf3e3d6c017329fcb7fbd75b844e +size 127450 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solvers.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solvers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d00217def18024e2546760eb6196d63453c9c15 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solvers.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c53d8c901c5dacacceb5187cbfc710f77b540578525fb32f18be370d8ef0751c +size 100352 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solveset.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solveset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09bcd715ac3f776ddd4e5cab11b90f6e6747c323 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/solvers/__pycache__/solveset.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c25deb9bd4df758f6f2aa04a1d6b747a112ef0b84943043087bea3838b34f7b +size 111979 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/ode.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/ode.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..11b41473086fa53ad5fce25273bda8806910f670 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/ode.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fab89094e2b1be979146cd2cfb58db0298b0ac426adedb7b240f91dfc81052 +size 121444 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/single.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/single.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..703e924e95485632c30c3a871eb978e38d23fad0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/solvers/ode/__pycache__/single.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32be89cc44a37bb64c6d8da92b9191b00f029583c34ebd0c1d5b4002d2f5c9e3 +size 105007 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/solvers/tests/__pycache__/test_solveset.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/solvers/tests/__pycache__/test_solveset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0db7cb954694ca8b5476ba0490a4a9868e5efdf --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/solvers/tests/__pycache__/test_solveset.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63940268cacbb48b8b06ce051fe472eec05a645b568f48d2c411040e3b83037 +size 137711 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/tensor/__pycache__/tensor.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/tensor/__pycache__/tensor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afd7a9155285e249e3bcb4f51f2a9338c4c138ee --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/tensor/__pycache__/tensor.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98301c07a7b284a4747405ff1047a381c2e9b89268954ef27dd425d22b2c17e8 +size 152967 diff --git a/pythonProject/.venv/Lib/site-packages/sympy/utilities/tests/__pycache__/test_wester.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/sympy/utilities/tests/__pycache__/test_wester.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8aa9350a5313649d377721a7e1c5c6af7d5b05d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/sympy/utilities/tests/__pycache__/test_wester.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3898238b6198b35e6236ee98c6e0666c0f52c186cff4fe923dfd8861d323b922 +size 113420 diff --git a/pythonProject/.venv/Lib/site-packages/tokenizers/tokenizers.pyd b/pythonProject/.venv/Lib/site-packages/tokenizers/tokenizers.pyd new file mode 100644 index 0000000000000000000000000000000000000000..3f2a77daf3198aadf316f0e22fe6ad9e319b0d4c --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/tokenizers/tokenizers.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e34f54071755a49a90345780b9170ddc3996b29a4c39d3a1fdc4867327f0ef0 +size 7669248 diff --git a/pythonProject/.venv/Lib/site-packages/torch/bin/asmjit.dll b/pythonProject/.venv/Lib/site-packages/torch/bin/asmjit.dll new file mode 100644 index 0000000000000000000000000000000000000000..e30ac49f0b4db7821900621731b7eb78b3b52ca3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/bin/asmjit.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9b845cf8873ea6ddbd9141366d2c8533094ca26bed8f93896c36e2d6b94070 +size 358912 diff --git a/pythonProject/.venv/Lib/site-packages/torch/bin/fbgemm.dll b/pythonProject/.venv/Lib/site-packages/torch/bin/fbgemm.dll new file mode 100644 index 0000000000000000000000000000000000000000..2472818cdb3faa62b286a23a1b6f3b4916ebc0d0 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/bin/fbgemm.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771aa8fcf61e8f36c747ae2e0c23d88aafc335cdaffac93e3b84cd144408c61c +size 4961280 diff --git a/pythonProject/.venv/Lib/site-packages/torch/bin/protoc.exe b/pythonProject/.venv/Lib/site-packages/torch/bin/protoc.exe new file mode 100644 index 0000000000000000000000000000000000000000..3d88303093e428be9ceca37a52507562ff0142ed --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/bin/protoc.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5b3e8f5d11c14430bfe13659e8aac3bf514db8d3e733243ba27535d05dadcf +size 2812416 diff --git a/pythonProject/.venv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3fc9b580e41d277bdbff5cf3e05a791d11364c7 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb192d0491d0a7f8abf5de964eb9dc1893633885397a75a9ecc8e48c1ee0c8cf +size 142107 diff --git a/pythonProject/.venv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-310.pyc b/pythonProject/.venv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6a2404f3c9b08093d7debd7e08acf2ab7146d6d --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-310.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42af6871792553f794c0cba8008c2f5bd5f7a05d6bfe68728d4beb26341399dc +size 139623 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.dll new file mode 100644 index 0000000000000000000000000000000000000000..e30ac49f0b4db7821900621731b7eb78b3b52ca3 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9b845cf8873ea6ddbd9141366d2c8533094ca26bed8f93896c36e2d6b94070 +size 358912 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.lib b/pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.lib new file mode 100644 index 0000000000000000000000000000000000000000..38e4db8208e951c027a3c9e6e00479e55068a6ef --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/asmjit.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26d7de0bdcdbe0469745a72640b9991fc528c56af7297faa3ec959361e14cf1 +size 125788 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/c10.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/c10.dll new file mode 100644 index 0000000000000000000000000000000000000000..f5280a6a9f3f4956e705db2e96ebc805d9f7a304 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/c10.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fe1a1846f3daf5a530e2f78faefb104a30fa0d2af264ac68823e6e6f0e1bc3 +size 828416 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/c10.lib b/pythonProject/.venv/Lib/site-packages/torch/lib/c10.lib new file mode 100644 index 0000000000000000000000000000000000000000..53780e67246526f3d659968d839f44565f379079 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/c10.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0459e8fb6f8f36828f642dcc78e4d7270321f91daba046946a1e9cee102b96b6 +size 733646 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/c10_cuda.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/c10_cuda.dll new file mode 100644 index 0000000000000000000000000000000000000000..37a4a5abcb51e7045e78e3d5d7730bf290b16b3e --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/c10_cuda.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807415e090cc50efc8b6f295a98e4566cfc2e2232a0dc874b7a4e926d89ec03d +size 357376 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cpuinfo.lib b/pythonProject/.venv/Lib/site-packages/torch/lib/cpuinfo.lib new file mode 100644 index 0000000000000000000000000000000000000000..1364f24862602e0b2afd1daa3a2cc5c148b80db9 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cpuinfo.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47f6ed5d514ca7c9fff35746f03feffe698e3b57f29dbf2fc5ff6b6132f417c +size 116450 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cudart64_110.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cudart64_110.dll new file mode 100644 index 0000000000000000000000000000000000000000..20ce96dde8350095485f3300821c8b225e1eb68a --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cudart64_110.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5c2fb526c4ee4bb218ceb3fa5e8bfde89ce474f38711fdcce802549bf9fc6f +size 526848 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn64_9.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn64_9.dll new file mode 100644 index 0000000000000000000000000000000000000000..76a5caffb4fdc7854aa051b3c8eb0268da7dfd5f --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn64_9.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19bd0a78a808f2728f2f56b48f01a0fde3266cf554ede58dc628a8f65ee9d96 +size 438312 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_cnn64_9.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_cnn64_9.dll new file mode 100644 index 0000000000000000000000000000000000000000..0204025f3e6708b0399becb7dc1710792ea0dcbd --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_cnn64_9.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806e3ed99b701e580a5f9bb9d1f38964574339786b307020d1b7634810a720d7 +size 3999288 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_engines_runtime_compiled64_9.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_engines_runtime_compiled64_9.dll new file mode 100644 index 0000000000000000000000000000000000000000..018248aac11505713317b3e3c132e6d38b309500 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_engines_runtime_compiled64_9.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c73bf8f654cae2aa0105bd9b3988b5c578e5bd10cff98d64e92817ad52da569 +size 6787640 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_graph64_9.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_graph64_9.dll new file mode 100644 index 0000000000000000000000000000000000000000..1ed38d125c46a34d6693abc20a82bcb85ecb4983 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cudnn_graph64_9.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9540215dc8dd773fb8d8ce04de7145869da3f6042682efbb7b98cd0df8f969ee +size 2146872 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cufftw64_10.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cufftw64_10.dll new file mode 100644 index 0000000000000000000000000000000000000000..4f319f83381d81dfc5058a0245b3c268d9bfbc55 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cufftw64_10.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab40fb62e96b7b9567f4f3255ce04614fa279227ee46237a2ea52986038433c +size 290816 diff --git a/pythonProject/.venv/Lib/site-packages/torch/lib/cupti64_2022.3.0.dll b/pythonProject/.venv/Lib/site-packages/torch/lib/cupti64_2022.3.0.dll new file mode 100644 index 0000000000000000000000000000000000000000..1f11d2d6b032abbcf58641f36e3ceb7a92cff918 --- /dev/null +++ b/pythonProject/.venv/Lib/site-packages/torch/lib/cupti64_2022.3.0.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42628524273bb1e28f9e66cafa70d85be29de5ae6d314441d4c09c427762ab5a +size 4291584 diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/__init__.py b/pythonProject/diffusers-main/tests/pipelines/pag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_kolors.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_kolors.py new file mode 100644 index 0000000000000000000000000000000000000000..1bbb4e79e4bcee3d6aac7ad9aec7fe363e37e460 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_kolors.py @@ -0,0 +1,259 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import unittest + +import numpy as np +import torch + +from diffusers import ( + AutoencoderKL, + EulerDiscreteScheduler, + KolorsPAGPipeline, + KolorsPipeline, + UNet2DConditionModel, +) +from diffusers.pipelines.kolors import ChatGLMModel, ChatGLMTokenizer + +from ...testing_utils import enable_full_determinism +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import ( + PipelineFromPipeTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class KolorsPAGPipelineFastTests( + PipelineTesterMixin, + PipelineFromPipeTesterMixin, + unittest.TestCase, +): + pipeline_class = KolorsPAGPipeline + params = TEXT_TO_IMAGE_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"}) + + supports_dduf = False + + # Copied from tests.pipelines.kolors.test_kolors.KolorsPipelineFastTests.get_dummy_components + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(2, 4), + layers_per_block=2, + time_cond_proj_dim=time_cond_proj_dim, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=56, + cross_attention_dim=8, + norm_num_groups=1, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder = ChatGLMModel.from_pretrained( + "hf-internal-testing/tiny-random-chatglm3-6b", torch_dtype=torch.float32 + ) + tokenizer = ChatGLMTokenizer.from_pretrained("hf-internal-testing/tiny-random-chatglm3-6b") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "image_encoder": None, + "feature_extractor": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "pag_scale": 0.9, + "output_type": "np", + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = KolorsPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + # pag_applied_layers = ["mid","up","down"] should apply to all self-attention layers + all_self_attn_layers = [k for k in pipe.unet.attn_processors.keys() if "attn1" in k] + original_attn_procs = pipe.unet.attn_processors + pag_layers = ["mid", "down", "up"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + all_self_attn_mid_layers = [ + "mid_block.attentions.0.transformer_blocks.0.attn1.processor", + "mid_block.attentions.0.transformer_blocks.1.attn1.processor", + ] + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.0"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + # pag_applied_layers = ["mid.block_0.attentions_1"] does not exist in the model + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.1"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + # pag_applied_layers = "down" should apply to all self-attention layers in down_blocks + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 4 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.0"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 4 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1.attentions.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 64, + 64, + 3, + ), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}" + expected_slice = np.array( + [0.26030684, 0.43192005, 0.4042826, 0.4189067, 0.5181305, 0.3832534, 0.472135, 0.4145031, 0.43726248] + ) + + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_encode_prompt_works_in_isolation(self): + return super().test_encode_prompt_works_in_isolation(atol=1e-3, rtol=1e-3) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_pixart_sigma.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_pixart_sigma.py new file mode 100644 index 0000000000000000000000000000000000000000..c04ebad08fdc4e9be2f3852d95bc9dd45116587b --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_pixart_sigma.py @@ -0,0 +1,349 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import tempfile +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +import diffusers +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + PixArtSigmaPAGPipeline, + PixArtSigmaPipeline, + PixArtTransformer2DModel, +) +from diffusers.utils import logging + +from ...testing_utils import ( + CaptureLogger, + enable_full_determinism, + torch_device, +) +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference, to_np + + +enable_full_determinism() + + +class PixArtSigmaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = PixArtSigmaPAGPipeline + params = TEXT_TO_IMAGE_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) + params = set(params) + params.remove("cross_attention_kwargs") + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = PipelineTesterMixin.required_optional_params + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = PixArtTransformer2DModel( + sample_size=8, + num_layers=2, + patch_size=2, + attention_head_dim=8, + num_attention_heads=3, + caption_channels=32, + in_channels=4, + cross_attention_dim=24, + out_channels=8, + attention_bias=True, + activation_fn="gelu-approximate", + num_embeds_ada_norm=1000, + norm_type="ada_norm_single", + norm_elementwise_affine=False, + norm_eps=1e-6, + ) + torch.manual_seed(0) + vae = AutoencoderKL() + + scheduler = DDIMScheduler() + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + components = { + "transformer": transformer.eval(), + "vae": vae.eval(), + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 1.0, + "pag_scale": 3.0, + "use_resolution_binning": False, + "output_type": "np", + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe = PixArtSigmaPipeline(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe.__class__.__name__}." + ) + out = pipe(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + components["pag_applied_layers"] = ["blocks.1"] + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + # "attn1" should apply to all self-attention layers. + all_self_attn_layers = [k for k in pipe.transformer.attn_processors.keys() if "attn1" in k] + pag_layers = ["blocks.0", "blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 8, + 8, + 3, + ), f"the shape of the output image should be (1, 8, 8, 3) but got {image.shape}" + expected_slice = np.array([0.6499, 0.3250, 0.3572, 0.6780, 0.4453, 0.4582, 0.2770, 0.5168, 0.4594]) + + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + # Because the PAG PixArt Sigma has `pag_applied_layers`. + # Also, we shouldn't be doing `set_default_attn_processor()` after loading + # the pipeline with `pag_applied_layers`. + def test_save_load_local(self, expected_max_difference=1e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(diffusers.logging.INFO) + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + + with CaptureLogger(logger) as cap_logger: + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, pag_applied_layers=["blocks.1"]) + + for name in pipe_loaded.components.keys(): + if name not in pipe_loaded._optional_components: + assert name in str(cap_logger) + + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + # We shouldn't be setting `set_default_attn_processor` here. + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + if test_mean_pixel_difference: + assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0])) + assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0])) + + # Because we have `pag_applied_layers` we cannot directly apply + # `set_default_attn_processor` + def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + if expected_slice is None: + output = pipe(**self.get_dummy_inputs(generator_device))[0] + else: + output = expected_slice + + output_tuple = pipe(**self.get_dummy_inputs(generator_device), return_dict=False)[0] + + if expected_slice is None: + max_diff = np.abs(to_np(output) - to_np(output_tuple)).max() + else: + if output_tuple.ndim != 5: + max_diff = np.abs(to_np(output) - to_np(output_tuple)[0, -3:, -3:, -1].flatten()).max() + else: + max_diff = np.abs(to_np(output) - to_np(output_tuple)[0, -3:, -3:, -1, -1].flatten()).max() + + self.assertLess(max_diff, expected_max_difference) + + # Same reason as above + def test_inference_batch_single_identical( + self, + batch_size=2, + expected_max_diff=1e-4, + additional_params_copy_to_batched_inputs=["num_inference_steps"], + ): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + inputs = self.get_dummy_inputs(torch_device) + # Reset generator in case it is has been used in self.get_dummy_inputs + inputs["generator"] = self.get_generator(0) + + logger = logging.get_logger(pipe.__module__) + logger.setLevel(level=diffusers.logging.FATAL) + + # batchify inputs + batched_inputs = {} + batched_inputs.update(inputs) + + for name in self.batch_params: + if name not in inputs: + continue + + value = inputs[name] + if name == "prompt": + len_prompt = len(value) + batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + batched_inputs[name][-1] = 100 * "very long" + + else: + batched_inputs[name] = batch_size * [value] + + if "generator" in inputs: + batched_inputs["generator"] = [self.get_generator(i) for i in range(batch_size)] + + if "batch_size" in inputs: + batched_inputs["batch_size"] = batch_size + + for arg in additional_params_copy_to_batched_inputs: + batched_inputs[arg] = inputs[arg] + + output = pipe(**inputs) + output_batch = pipe(**batched_inputs) + + assert output_batch[0].shape[0] == batch_size + + max_diff = np.abs(to_np(output_batch[0][0]) - to_np(output[0][0])).max() + assert max_diff < expected_max_diff + + # Because we're passing `pag_applied_layers` (type of List) in the components as well. + def test_components_function(self): + init_components = self.get_dummy_components() + init_components = {k: v for k, v in init_components.items() if not isinstance(v, (str, int, float, list))} + + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + @unittest.skip("Test is already covered through encode_prompt isolation.") + def test_save_load_optional_components(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sana.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sana.py new file mode 100644 index 0000000000000000000000000000000000000000..5408595c729dbc21113b651e0d1f80ad402b85d5 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sana.py @@ -0,0 +1,341 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import unittest + +import numpy as np +import torch +from transformers import Gemma2Config, Gemma2ForCausalLM, GemmaTokenizer + +from diffusers import ( + AutoencoderDC, + FlowMatchEulerDiscreteScheduler, + SanaPAGPipeline, + SanaPipeline, + SanaTransformer2DModel, +) + +from ...testing_utils import enable_full_determinism, torch_device +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class SanaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SanaPAGPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SanaTransformer2DModel( + patch_size=1, + in_channels=4, + out_channels=4, + num_layers=2, + num_attention_heads=2, + attention_head_dim=4, + num_cross_attention_heads=2, + cross_attention_head_dim=4, + cross_attention_dim=8, + caption_channels=8, + sample_size=32, + ) + + torch.manual_seed(0) + vae = AutoencoderDC( + in_channels=3, + latent_channels=4, + attention_head_dim=2, + encoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + decoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + encoder_block_out_channels=(8, 8), + decoder_block_out_channels=(8, 8), + encoder_qkv_multiscales=((), (5,)), + decoder_qkv_multiscales=((), (5,)), + encoder_layers_per_block=(1, 1), + decoder_layers_per_block=[1, 1], + downsample_block_type="conv", + upsample_block_type="interpolate", + decoder_norm_types="rms_norm", + decoder_act_fns="silu", + scaling_factor=0.41407, + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + + torch.manual_seed(0) + text_encoder_config = Gemma2Config( + head_dim=16, + hidden_size=32, + initializer_range=0.02, + intermediate_size=64, + max_position_embeddings=8192, + model_type="gemma2", + num_attention_heads=2, + num_hidden_layers=1, + num_key_value_heads=2, + vocab_size=8, + attn_implementation="eager", + ) + text_encoder = Gemma2ForCausalLM(text_encoder_config) + tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "", + "negative_prompt": "", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "pag_scale": 3.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + "complex_human_instruction": None, + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs)[0] + generated_image = image[0] + + self.assertEqual(generated_image.shape, (3, 32, 32)) + expected_image = torch.randn(3, 32, 32) + max_diff = np.abs(generated_image - expected_image).max() + self.assertLessEqual(max_diff, 1e10) + + def test_callback_inputs(self): + sig = inspect.signature(self.pipeline_class.__call__) + has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters + has_callback_step_end = "callback_on_step_end" in sig.parameters + + if not (has_callback_tensor_inputs and has_callback_step_end): + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + self.assertTrue( + hasattr(pipe, "_callback_tensor_inputs"), + f" {self.pipeline_class} should have `_callback_tensor_inputs` that defines a list of tensor variables its callback function can use as inputs", + ) + + def callback_inputs_subset(pipe, i, t, callback_kwargs): + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + def callback_inputs_all(pipe, i, t, callback_kwargs): + for tensor_name in pipe._callback_tensor_inputs: + assert tensor_name in callback_kwargs + + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + + # Test passing in a subset + inputs["callback_on_step_end"] = callback_inputs_subset + inputs["callback_on_step_end_tensor_inputs"] = ["latents"] + output = pipe(**inputs)[0] + + # Test passing in a everything + inputs["callback_on_step_end"] = callback_inputs_all + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + + def callback_inputs_change_tensor(pipe, i, t, callback_kwargs): + is_last = i == (pipe.num_timesteps - 1) + if is_last: + callback_kwargs["latents"] = torch.zeros_like(callback_kwargs["latents"]) + return callback_kwargs + + inputs["callback_on_step_end"] = callback_inputs_change_tensor + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + assert output.abs().sum() < 1e10 + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = SanaPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + components = self.get_dummy_components() + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + all_self_attn_layers = [k for k in pipe.transformer.attn_processors.keys() if "attn1" in k] + original_attn_procs = pipe.transformer.attn_processors + pag_layers = ["blocks.0", "blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + # blocks.0 + block_0_self_attn = ["transformer_blocks.0.attn1.processor"] + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.0"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(block_0_self_attn) + + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.0.attn1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(block_0_self_attn) + + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.(0|1)"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert (len(pipe.pag_attn_processors)) == 2 + + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.0", r"blocks\.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + # TODO(aryan): Create a dummy gemma model with smol vocab size + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_consistent(self): + pass + + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_single_identical(self): + pass + + def test_float16_inference(self): + # Requires higher tolerance as model seems very sensitive to dtype + super().test_float16_inference(expected_max_diff=0.08) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd.py new file mode 100644 index 0000000000000000000000000000000000000000..064815d1369341480d6b91fdda417e276af824d3 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd.py @@ -0,0 +1,350 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import inspect +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + AutoPipelineForText2Image, + DDIMScheduler, + StableDiffusionPAGPipeline, + StableDiffusionPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineFromPipeTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionPAGPipelineFastTests( + PipelineTesterMixin, + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineFromPipeTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionPAGPipeline + params = TEXT_TO_IMAGE_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"}) + + def get_dummy_components(self, time_cond_proj_dim=None): + cross_attention_dim = 8 + + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(4, 8), + layers_per_block=2, + sample_size=32, + time_cond_proj_dim=time_cond_proj_dim, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=cross_attention_dim, + norm_num_groups=2, + ) + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[4, 8], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + norm_num_groups=2, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=cross_attention_dim, + intermediate_size=16, + layer_norm_eps=1e-05, + num_attention_heads=2, + num_hidden_layers=2, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "pag_scale": 0.9, + "output_type": "np", + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = StableDiffusionPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + # pag_applied_layers = ["mid","up","down"] should apply to all self-attention layers + all_self_attn_layers = [k for k in pipe.unet.attn_processors.keys() if "attn1" in k] + original_attn_procs = pipe.unet.attn_processors + pag_layers = [ + "down", + "mid", + "up", + ] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + # pag_applied_layers = ["mid"], or ["mid.block_0"] or ["mid.block_0.attentions_0"] should apply to all self-attention layers in mid_block, i.e. + # mid_block.attentions.0.transformer_blocks.0.attn1.processor + # mid_block.attentions.0.transformer_blocks.1.attn1.processor + all_self_attn_mid_layers = [ + "mid_block.attentions.0.transformer_blocks.0.attn1.processor", + # "mid_block.attentions.0.transformer_blocks.1.attn1.processor", + ] + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.0"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + # pag_applied_layers = ["mid.block_0.attentions_1"] does not exist in the model + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.1"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + # pag_applied_layers = "down" should apply to all self-attention layers in down_blocks + # down_blocks.1.attentions.0.transformer_blocks.0.attn1.processor + # down_blocks.1.attentions.0.transformer_blocks.1.attn1.processor + # down_blocks.1.attentions.0.transformer_blocks.0.attn1.processor + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.0"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1.attentions.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 1 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 64, + 64, + 3, + ), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}" + + expected_slice = np.array( + [0.22802538, 0.44626093, 0.48905736, 0.29633686, 0.36400637, 0.4724258, 0.4678891, 0.32260418, 0.41611585] + ) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase): + pipeline_class = StableDiffusionPAGPipeline + repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", seed=1, guidance_scale=7.0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a polar bear sitting in a chair drinking a milkshake", + "negative_prompt": "deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality", + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": guidance_scale, + "pag_scale": 3.0, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 512, 512, 3) + + expected_slice = np.array( + [0.58251953, 0.5722656, 0.5683594, 0.55029297, 0.52001953, 0.52001953, 0.49951172, 0.45410156, 0.50146484] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.5986328, 0.52441406, 0.3972168, 0.4741211, 0.34985352, 0.22705078, 0.4128418, 0.2866211, 0.31713867] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd3.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd3.py new file mode 100644 index 0000000000000000000000000000000000000000..26e6ca099286b880b38581ebb92723b3d276fbfb --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd3.py @@ -0,0 +1,263 @@ +import inspect +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + FlowMatchEulerDiscreteScheduler, + SD3Transformer2DModel, + StableDiffusion3PAGPipeline, + StableDiffusion3Pipeline, +) + +from ...testing_utils import ( + torch_device, +) +from ..test_pipelines_common import ( + PipelineTesterMixin, + check_qkv_fusion_matches_attn_procs_length, + check_qkv_fusion_processors_exist, +) + + +class StableDiffusion3PAGPipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = StableDiffusion3PAGPipeline + params = frozenset( + [ + "prompt", + "height", + "width", + "guidance_scale", + "negative_prompt", + "prompt_embeds", + "negative_prompt_embeds", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt"]) + test_xformers_attention = False + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SD3Transformer2DModel( + sample_size=32, + patch_size=1, + in_channels=4, + num_layers=2, + attention_head_dim=8, + num_attention_heads=4, + caption_projection_dim=32, + joint_attention_dim=32, + pooled_projection_dim=64, + out_channels=4, + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) + + text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=4, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "text_encoder_3": text_encoder_3, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "tokenizer_3": tokenizer_3, + "transformer": transformer, + "vae": vae, + } + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "pag_scale": 0.0, + } + return inputs + + def test_stable_diffusion_3_different_prompts(self): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + + inputs = self.get_dummy_inputs(torch_device) + output_same_prompt = pipe(**inputs).images[0] + + inputs = self.get_dummy_inputs(torch_device) + inputs["prompt_2"] = "a different prompt" + inputs["prompt_3"] = "another different prompt" + output_different_prompts = pipe(**inputs).images[0] + + max_diff = np.abs(output_same_prompt - output_different_prompts).max() + + # Outputs should be different here + assert max_diff > 1e-2 + + def test_stable_diffusion_3_different_negative_prompts(self): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + + inputs = self.get_dummy_inputs(torch_device) + output_same_prompt = pipe(**inputs).images[0] + + inputs = self.get_dummy_inputs(torch_device) + inputs["negative_prompt_2"] = "deformed" + inputs["negative_prompt_3"] = "blurry" + output_different_prompts = pipe(**inputs).images[0] + + max_diff = np.abs(output_same_prompt - output_different_prompts).max() + + # Outputs should be different here + assert max_diff > 1e-2 + + def test_fused_qkv_projections(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + original_image_slice = image[0, -3:, -3:, -1] + + # TODO (sayakpaul): will refactor this once `fuse_qkv_projections()` has been added + # to the pipeline level. + pipe.transformer.fuse_qkv_projections() + assert check_qkv_fusion_processors_exist(pipe.transformer), ( + "Something wrong with the fused attention processors. Expected all the attention processors to be fused." + ) + assert check_qkv_fusion_matches_attn_procs_length( + pipe.transformer, pipe.transformer.original_attn_processors + ), "Something wrong with the attention processors concerning the fused QKV projections." + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_fused = image[0, -3:, -3:, -1] + + pipe.transformer.unfuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_disabled = image[0, -3:, -3:, -1] + + assert np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3), ( + "Fusion of QKV projections shouldn't affect the outputs." + ) + assert np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3), ( + "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled." + ) + assert np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2), ( + "Original outputs should match when fused QKV projections are disabled." + ) + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = StableDiffusion3Pipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + components = self.get_dummy_components() + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + all_self_attn_layers = [k for k in pipe.transformer.attn_processors.keys() if "attn" in k] + original_attn_procs = pipe.transformer.attn_processors + pag_layers = ["blocks.0", "blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + # blocks.0 + block_0_self_attn = ["transformer_blocks.0.attn.processor"] + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.0"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(block_0_self_attn) + + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.0.attn"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(block_0_self_attn) + + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.(0|1)"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert (len(pipe.pag_attn_processors)) == 2 + + pipe.transformer.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["blocks.0", r"blocks\.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd3_img2img.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd3_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..19a36e283de478a04e129e8059cb1d757975a554 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd3_img2img.py @@ -0,0 +1,277 @@ +import gc +import inspect +import random +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + AutoPipelineForImage2Image, + FlowMatchEulerDiscreteScheduler, + SD3Transformer2DModel, + StableDiffusion3Img2ImgPipeline, + StableDiffusion3PAGImg2ImgPipeline, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = StableDiffusion3PAGImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latens_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS + + test_xformers_attention = False + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SD3Transformer2DModel( + sample_size=32, + patch_size=1, + in_channels=4, + num_layers=2, + attention_head_dim=8, + num_attention_heads=4, + caption_projection_dim=32, + joint_attention_dim=32, + pooled_projection_dim=64, + out_channels=4, + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) + + text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=4, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "text_encoder_3": text_encoder_3, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "tokenizer_3": tokenizer_3, + "transformer": transformer, + "vae": vae, + } + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "pag_scale": 0.7, + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = StableDiffusion3Img2ImgPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + components = self.get_dummy_components() + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["blocks.0"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 32, + 32, + 3, + ), f"the shape of the output image should be (1, 32, 32, 3) but got {image.shape}" + + expected_slice = np.array( + [0.66063476, 0.44838923, 0.5484299, 0.7242875, 0.5970012, 0.6015729, 0.53080845, 0.52220416, 0.56397927] + ) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + +@slow +@require_torch_accelerator +class StableDiffusion3PAGImg2ImgPipelineIntegrationTests(unittest.TestCase): + pipeline_class = StableDiffusion3PAGImg2ImgPipeline + repo_id = "stabilityai/stable-diffusion-3-medium-diffusers" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs( + self, device, generator_device="cpu", dtype=torch.float32, seed=0, guidance_scale=7.0, pag_scale=0.7 + ): + img_url = ( + "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png" + ) + init_image = load_image(img_url) + + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "an astronaut in a space suit walking through a jungle", + "generator": generator, + "image": init_image, + "num_inference_steps": 12, + "strength": 0.6, + "guidance_scale": guidance_scale, + "pag_scale": pag_scale, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForImage2Image.from_pretrained( + self.repo_id, enable_pag=True, torch_dtype=torch.float16, pag_applied_layers=["blocks.17"] + ) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [ + 0.16772461, + 0.17626953, + 0.18432617, + 0.17822266, + 0.18359375, + 0.17626953, + 0.17407227, + 0.17700195, + 0.17822266, + ] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForImage2Image.from_pretrained( + self.repo_id, enable_pag=True, torch_dtype=torch.float16, pag_applied_layers=["blocks.(4|17)"] + ) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0, pag_scale=1.8) + image = pipeline(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.1508789, 0.16210938, 0.17138672, 0.16210938, 0.17089844, 0.16137695, 0.16235352, 0.16430664, 0.16455078] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd_img2img.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..0b440d5ec9fc3b2c7458490fa4bfae9e2b1f6056 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd_img2img.py @@ -0,0 +1,290 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import inspect +import random +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + AutoencoderTiny, + AutoPipelineForImage2Image, + EulerDiscreteScheduler, + StableDiffusionImg2ImgPipeline, + StableDiffusionPAGImg2ImgPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionPAGImg2ImgPipelineFastTests( + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionPAGImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS + + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + time_cond_proj_dim=time_cond_proj_dim, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_tiny_autoencoder(self): + return AutoencoderTiny(in_channels=3, out_channels=3, latent_channels=4) + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "pag_scale": 0.9, + "output_type": "np", + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = StableDiffusionImg2ImgPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 32, + 32, + 3, + ), f"the shape of the output image should be (1, 32, 32, 3) but got {image.shape}" + + expected_slice = np.array( + [0.44203848, 0.49598145, 0.42248967, 0.6707724, 0.5683791, 0.43603387, 0.58316565, 0.60077155, 0.5174199] + ) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableDiffusionPAGImg2ImgPipelineIntegrationTests(unittest.TestCase): + pipeline_class = StableDiffusionPAGImg2ImgPipeline + repo_id = "Jiali/stable-diffusion-1.5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "image": init_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "pag_scale": 3.0, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 512, 512, 3) + + expected_slice = np.array( + [0.58251953, 0.5722656, 0.5683594, 0.55029297, 0.52001953, 0.52001953, 0.49951172, 0.45410156, 0.50146484] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.5986328, 0.52441406, 0.3972168, 0.4741211, 0.34985352, 0.22705078, 0.4128418, 0.2866211, 0.31713867] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..709df683705593308a0554362275e8e506fb1fd0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sd_inpaint.py @@ -0,0 +1,324 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + AutoPipelineForInpainting, + PNDMScheduler, + StableDiffusionPAGInpaintPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineFromPipeTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionPAGInpaintPipelineFastTests( + PipelineTesterMixin, + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineFromPipeTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionPAGInpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset([]) + image_latents_params = frozenset([]) + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union( + {"add_text_embeds", "add_time_ids", "mask", "masked_image_latents"} + ) + + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + time_cond_proj_dim=time_cond_proj_dim, + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + # create mask + image[8:, 8:, :] = 255 + mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64)) + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "strength": 1.0, + "pag_scale": 0.9, + "output_type": "np", + } + return inputs + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + # pag_applied_layers = ["mid","up","down"] should apply to all self-attention layers + all_self_attn_layers = [k for k in pipe.unet.attn_processors.keys() if "attn1" in k] + original_attn_procs = pipe.unet.attn_processors + pag_layers = [ + "down", + "mid", + "up", + ] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + # pag_applied_layers = ["mid"], or ["mid.block_0"] or ["mid.block_0.attentions_0"] should apply to all self-attention layers in mid_block, i.e. + # mid_block.attentions.0.transformer_blocks.0.attn1.processor + # mid_block.attentions.0.transformer_blocks.1.attn1.processor + all_self_attn_mid_layers = [ + "mid_block.attentions.0.transformer_blocks.0.attn1.processor", + # "mid_block.attentions.0.transformer_blocks.1.attn1.processor", + ] + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.0"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + # pag_applied_layers = ["mid.block_0.attentions_1"] does not exist in the model + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.1"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + # pag_applied_layers = "down" should apply to all self-attention layers in down_blocks + # down_blocks.1.attentions.0.transformer_blocks.0.attn1.processor + # down_blocks.1.attentions.0.transformer_blocks.1.attn1.processor + # down_blocks.1.attentions.0.transformer_blocks.0.attn1.processor + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.0"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1.attentions.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 1 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 64, + 64, + 3, + ), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}" + + expected_slice = np.array([0.7190, 0.5807, 0.6007, 0.5600, 0.6350, 0.6639, 0.5680, 0.5664, 0.5230]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, atol=1e-3, rtol=1e-3) + + +@slow +@require_torch_accelerator +class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase): + pipeline_class = StableDiffusionPAGInpaintPipeline + repo_id = "runwayml/stable-diffusion-v1-5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0): + img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" + mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" + + init_image = load_image(img_url).convert("RGB") + mask_image = load_image(mask_url).convert("RGB") + + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "A majestic tiger sitting on a bench", + "generator": generator, + "image": init_image, + "mask_image": mask_image, + "strength": 0.8, + "num_inference_steps": 3, + "guidance_scale": guidance_scale, + "pag_scale": 3.0, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 512, 512, 3) + + expected_slice = np.array( + [0.38793945, 0.4111328, 0.47924805, 0.39208984, 0.4165039, 0.41674805, 0.37060547, 0.36791992, 0.40625] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.3876953, 0.40356445, 0.4934082, 0.39697266, 0.41674805, 0.41015625, 0.375, 0.36914062, 0.40649414] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl.py new file mode 100644 index 0000000000000000000000000000000000000000..cca5c61651b34b9d62101937687a3017f8db7ad7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl.py @@ -0,0 +1,353 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import inspect +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + AutoPipelineForText2Image, + EulerDiscreteScheduler, + StableDiffusionXLPAGPipeline, + StableDiffusionXLPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineFromPipeTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionXLPAGPipelineFastTests( + PipelineTesterMixin, + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineFromPipeTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionXLPAGPipeline + params = TEXT_TO_IMAGE_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"}) + + def get_dummy_components(self, time_cond_proj_dim=None): + # Copied from tests.pipelines.stable_diffusion_xl.test_stable_diffusion_xl.StableDiffusionXLPipelineFastTests.get_dummy_components + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(2, 4), + layers_per_block=2, + time_cond_proj_dim=time_cond_proj_dim, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=80, # 6 * 8 + 32 + cross_attention_dim=64, + norm_num_groups=1, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "image_encoder": None, + "feature_extractor": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "pag_scale": 0.9, + "output_type": "np", + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline (expect same output when pag is disabled) + pipe_sd = StableDiffusionXLPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_applied_layers(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # base pipeline + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + # pag_applied_layers = ["mid","up","down"] should apply to all self-attention layers + all_self_attn_layers = [k for k in pipe.unet.attn_processors.keys() if "attn1" in k] + original_attn_procs = pipe.unet.attn_processors + pag_layers = ["mid", "down", "up"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_layers) + + # pag_applied_layers = ["mid"], or ["mid.block_0"] or ["mid.block_0.attentions_0"] should apply to all self-attention layers in mid_block, i.e. + # mid_block.attentions.0.transformer_blocks.0.attn1.processor + # mid_block.attentions.0.transformer_blocks.1.attn1.processor + all_self_attn_mid_layers = [ + "mid_block.attentions.0.transformer_blocks.0.attn1.processor", + "mid_block.attentions.0.transformer_blocks.1.attn1.processor", + ] + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.0"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert set(pipe.pag_attn_processors) == set(all_self_attn_mid_layers) + + # pag_applied_layers = ["mid.block_0.attentions_1"] does not exist in the model + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["mid_block.attentions.1"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + # pag_applied_layers = "down" should apply to all self-attention layers in down_blocks + # down_blocks.1.attentions.0.transformer_blocks.0.attn1.processor + # down_blocks.1.attentions.0.transformer_blocks.1.attn1.processor + # down_blocks.1.attentions.1.transformer_blocks.0.attn1.processor + # down_blocks.1.attentions.1.transformer_blocks.1.attn1.processor + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 4 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.0"] + with self.assertRaises(ValueError): + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 4 + + pipe.unet.set_attn_processor(original_attn_procs.copy()) + pag_layers = ["down_blocks.1.attentions.1"] + pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + assert len(pipe.pag_attn_processors) == 2 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 64, + 64, + 3, + ), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}" + expected_slice = np.array([0.5382, 0.5439, 0.4704, 0.4569, 0.5234, 0.4834, 0.5289, 0.5039, 0.4764]) + + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + +@slow +@require_torch_accelerator +class StableDiffusionXLPAGPipelineIntegrationTests(unittest.TestCase): + pipeline_class = StableDiffusionXLPAGPipeline + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a polar bear sitting in a chair drinking a milkshake", + "negative_prompt": "deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality", + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": guidance_scale, + "pag_scale": 3.0, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.3123679, 0.31725878, 0.32026544, 0.327533, 0.3266391, 0.3303998, 0.33544615, 0.34181812, 0.34102726] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.47400922, 0.48650584, 0.4839625, 0.4724013, 0.4890427, 0.49544555, 0.51707107, 0.54299414, 0.5224372] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl_img2img.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..d311500d3ca7f90ad4188afaa385ee24d649f3e4 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl_img2img.py @@ -0,0 +1,338 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import inspect +import random +import unittest + +import numpy as np +import torch +from transformers import ( + CLIPImageProcessor, + CLIPTextConfig, + CLIPTextModel, + CLIPTextModelWithProjection, + CLIPTokenizer, + CLIPVisionConfig, + CLIPVisionModelWithProjection, +) + +from diffusers import ( + AutoencoderKL, + AutoPipelineForImage2Image, + EulerDiscreteScheduler, + StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLPAGImg2ImgPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineFromPipeTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionXLPAGImg2ImgPipelineFastTests( + PipelineTesterMixin, + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineFromPipeTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionXLPAGImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) - {"height", "width"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union( + {"add_text_embeds", "add_time_ids", "add_neg_time_ids"} + ) + + supports_dduf = False + + # based on tests.pipelines.stable_diffusion_xl.test_stable_diffusion_xl_img2img_pipeline.get_dummy_components + def get_dummy_components( + self, skip_first_text_encoder=False, time_cond_proj_dim=None, requires_aesthetics_score=False + ): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + time_cond_proj_dim=time_cond_proj_dim, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=72 if requires_aesthetics_score else 80, # 5 * 8 + 32 + cross_attention_dim=64 if not skip_first_text_encoder else 32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=32, + image_size=224, + projection_dim=32, + intermediate_size=37, + num_attention_heads=4, + num_channels=3, + num_hidden_layers=5, + patch_size=14, + ) + + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + feature_extractor = CLIPImageProcessor( + crop_size=224, + do_center_crop=True, + do_normalize=True, + do_resize=True, + image_mean=[0.48145466, 0.4578275, 0.40821073], + image_std=[0.26862954, 0.26130258, 0.27577711], + resample=3, + size=224, + ) + + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder if not skip_first_text_encoder else None, + "tokenizer": tokenizer if not skip_first_text_encoder else None, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "requires_aesthetics_score": requires_aesthetics_score, + "image_encoder": image_encoder, + "feature_extractor": feature_extractor, + } + return components + + # based on tests.pipelines.stable_diffusion_xl.test_stable_diffusion_xl_img2img_pipeline.StableDiffusionXLImg2ImgPipelineFastTests + # add `pag_scale` to the inputs + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "pag_scale": 3.0, + "output_type": "np", + "strength": 0.8, + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(requires_aesthetics_score=True) + + # base pipeline + pipe_sd = StableDiffusionXLImg2ImgPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(requires_aesthetics_score=True) + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 32, + 32, + 3, + ), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}" + expected_slice = np.array([0.4613, 0.4902, 0.4406, 0.6788, 0.5611, 0.4529, 0.5893, 0.5975, 0.5226]) + + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + +@slow +@require_torch_accelerator +class StableDiffusionXLPAGImg2ImgPipelineIntegrationTests(unittest.TestCase): + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0): + img_url = ( + "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png" + ) + + init_image = load_image(img_url) + + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a dog catching a frisbee in the jungle", + "generator": generator, + "image": init_image, + "strength": 0.8, + "num_inference_steps": 3, + "guidance_scale": guidance_scale, + "pag_scale": 3.0, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.20301354, 0.21078318, 0.2021082, 0.20277798, 0.20681083, 0.19562206, 0.20121682, 0.21562952, 0.21277016] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.21303111, 0.22188407, 0.2124992, 0.21365267, 0.18823743, 0.17569828, 0.21113116, 0.19419771, 0.18919235] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..00a07582e205dd9fe1a3bdf024655d3b8c87929b --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pag/test_pag_sdxl_inpaint.py @@ -0,0 +1,344 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import inspect +import random +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import ( + CLIPImageProcessor, + CLIPTextConfig, + CLIPTextModel, + CLIPTextModelWithProjection, + CLIPTokenizer, + CLIPVisionConfig, + CLIPVisionModelWithProjection, +) + +from diffusers import ( + AutoencoderKL, + AutoPipelineForInpainting, + EulerDiscreteScheduler, + StableDiffusionXLInpaintPipeline, + StableDiffusionXLPAGInpaintPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineFromPipeTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionXLPAGInpaintPipelineFastTests( + PipelineTesterMixin, + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineFromPipeTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionXLPAGInpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS.union({"pag_scale", "pag_adaptive_scale"}) + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset([]) + image_latents_params = frozenset([]) + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union( + {"add_text_embeds", "add_time_ids", "mask", "masked_image_latents"} + ) + + supports_dduf = False + + # based on tests.pipelines.stable_diffusion_xl.test_stable_diffusion_xl_inpaint.StableDiffusionXLInpaintPipelineFastTests.get_dummy_components + def get_dummy_components( + self, skip_first_text_encoder=False, time_cond_proj_dim=None, requires_aesthetics_score=False + ): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + time_cond_proj_dim=time_cond_proj_dim, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=72 if requires_aesthetics_score else 80, # 5 * 8 + 32 + cross_attention_dim=64 if not skip_first_text_encoder else 32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=32, + image_size=224, + projection_dim=32, + intermediate_size=37, + num_attention_heads=4, + num_channels=3, + num_hidden_layers=5, + patch_size=14, + ) + + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + feature_extractor = CLIPImageProcessor( + crop_size=224, + do_center_crop=True, + do_normalize=True, + do_resize=True, + image_mean=[0.48145466, 0.4578275, 0.40821073], + image_std=[0.26862954, 0.26130258, 0.27577711], + resample=3, + size=224, + ) + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder if not skip_first_text_encoder else None, + "tokenizer": tokenizer if not skip_first_text_encoder else None, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "image_encoder": image_encoder, + "feature_extractor": feature_extractor, + "requires_aesthetics_score": requires_aesthetics_score, + } + return components + + def get_dummy_inputs(self, device, seed=0): + # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + # create mask + image[8:, 8:, :] = 255 + mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64)) + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "strength": 1.0, + "pag_scale": 0.9, + "output_type": "np", + } + return inputs + + def test_pag_disable_enable(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(requires_aesthetics_score=True) + + # base pipeline + pipe_sd = StableDiffusionXLInpaintPipeline(**components) + pipe_sd = pipe_sd.to(device) + pipe_sd.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["pag_scale"] + assert "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters, ( + f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}." + ) + out = pipe_sd(**inputs).images[0, -3:, -3:, -1] + + # pag disabled with pag_scale=0.0 + pipe_pag = self.pipeline_class(**components) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["pag_scale"] = 0.0 + out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + # pag enabled + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1] + + assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 + assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 + + def test_pag_inference(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(requires_aesthetics_score=True) + + pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"]) + pipe_pag = pipe_pag.to(device) + pipe_pag.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe_pag(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == ( + 1, + 64, + 64, + 3, + ), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}" + expected_slice = np.array([0.8366, 0.5513, 0.6105, 0.6213, 0.6957, 0.7400, 0.6614, 0.6102, 0.5239]) + + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + +@slow +@require_torch_accelerator +class StableDiffusionXLPAGInpaintPipelineIntegrationTests(unittest.TestCase): + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0): + img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" + mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" + + init_image = load_image(img_url).convert("RGB") + mask_image = load_image(mask_url).convert("RGB") + + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "A majestic tiger sitting on a bench", + "generator": generator, + "image": init_image, + "mask_image": mask_image, + "strength": 0.8, + "num_inference_steps": 3, + "guidance_scale": guidance_scale, + "pag_scale": 3.0, + "output_type": "np", + } + return inputs + + def test_pag_cfg(self): + pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.41385046, 0.39608297, 0.4360491, 0.26872507, 0.32187328, 0.4242474, 0.2603805, 0.34167895, 0.46561807] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) + + def test_pag_uncond(self): + pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16) + pipeline.enable_model_cpu_offload(device=torch_device) + pipeline.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, guidance_scale=0.0) + image = pipeline(**inputs).images + + image_slice = image[0, -3:, -3:, -1].flatten() + assert image.shape == (1, 1024, 1024, 3) + expected_slice = np.array( + [0.41597816, 0.39302617, 0.44287828, 0.2687074, 0.28315824, 0.40582314, 0.20877528, 0.2380802, 0.39447647] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3, ( + f"output is different from expected, {image_slice.flatten()}" + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/pixart_alpha/__init__.py b/pythonProject/diffusers-main/tests/pipelines/pixart_alpha/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/pixart_alpha/test_pixart.py b/pythonProject/diffusers-main/tests/pipelines/pixart_alpha/test_pixart.py new file mode 100644 index 0000000000000000000000000000000000000000..fd41c9887dcccf67c3324cc03eb6ef29990eefa7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pixart_alpha/test_pixart.py @@ -0,0 +1,375 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import tempfile +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + PixArtAlphaPipeline, + PixArtTransformer2DModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = PixArtAlphaPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + + required_optional_params = PipelineTesterMixin.required_optional_params + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = PixArtTransformer2DModel( + sample_size=8, + num_layers=2, + patch_size=2, + attention_head_dim=8, + num_attention_heads=3, + caption_channels=32, + in_channels=4, + cross_attention_dim=24, + out_channels=8, + attention_bias=True, + activation_fn="gelu-approximate", + num_embeds_ada_norm=1000, + norm_type="ada_norm_single", + norm_elementwise_affine=False, + norm_eps=1e-6, + ) + torch.manual_seed(0) + vae = AutoencoderKL() + + scheduler = DDIMScheduler() + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + components = { + "transformer": transformer.eval(), + "vae": vae.eval(), + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "use_resolution_binning": False, + "output_type": "np", + } + return inputs + + @unittest.skip("Not supported.") + def test_sequential_cpu_offload_forward_pass(self): + # TODO(PVP, Sayak) need to fix later + return + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + self.assertEqual(image.shape, (1, 8, 8, 3)) + expected_slice = np.array([0.6319, 0.3526, 0.3806, 0.6327, 0.4639, 0.483, 0.2583, 0.5331, 0.4852]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_inference_non_square_images(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs, height=32, width=48).images + image_slice = image[0, -3:, -3:, -1] + self.assertEqual(image.shape, (1, 32, 48, 3)) + + expected_slice = np.array([0.6493, 0.537, 0.4081, 0.4762, 0.3695, 0.4711, 0.3026, 0.5218, 0.5263]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + @unittest.skip("Test is already covered through encode_prompt isolation.") + def test_save_load_optional_components(self): + pass + + def test_inference_with_embeddings_and_multiple_images(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + prompt = inputs["prompt"] + generator = inputs["generator"] + num_inference_steps = inputs["num_inference_steps"] + output_type = inputs["output_type"] + + prompt_embeds, prompt_attn_mask, negative_prompt_embeds, neg_prompt_attn_mask = pipe.encode_prompt(prompt) + + # inputs with prompt converted to embeddings + inputs = { + "prompt_embeds": prompt_embeds, + "prompt_attention_mask": prompt_attn_mask, + "negative_prompt": None, + "negative_prompt_embeds": negative_prompt_embeds, + "negative_prompt_attention_mask": neg_prompt_attn_mask, + "generator": generator, + "num_inference_steps": num_inference_steps, + "output_type": output_type, + "num_images_per_prompt": 2, + "use_resolution_binning": False, + } + + # set all optional components to None + for optional_component in pipe._optional_components: + setattr(pipe, optional_component, None) + + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for optional_component in pipe._optional_components: + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(torch_device) + + generator = inputs["generator"] + num_inference_steps = inputs["num_inference_steps"] + output_type = inputs["output_type"] + + # inputs with prompt converted to embeddings + inputs = { + "prompt_embeds": prompt_embeds, + "prompt_attention_mask": prompt_attn_mask, + "negative_prompt": None, + "negative_prompt_embeds": negative_prompt_embeds, + "negative_prompt_attention_mask": neg_prompt_attn_mask, + "generator": generator, + "num_inference_steps": num_inference_steps, + "output_type": output_type, + "num_images_per_prompt": 2, + "use_resolution_binning": False, + } + + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, 1e-4) + + def test_inference_with_multiple_images_per_prompt(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["num_images_per_prompt"] = 2 + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + self.assertEqual(image.shape, (2, 8, 8, 3)) + expected_slice = np.array([0.6319, 0.3526, 0.3806, 0.6327, 0.4639, 0.483, 0.2583, 0.5331, 0.4852]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_raises_warning_for_mask_feature(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs.update({"mask_feature": True}) + + with self.assertWarns(FutureWarning) as warning_ctx: + _ = pipe(**inputs).images + + assert "mask_feature" in str(warning_ctx.warning) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-3) + + +@slow +@require_torch_accelerator +class PixArtAlphaPipelineIntegrationTests(unittest.TestCase): + ckpt_id_1024 = "PixArt-alpha/PixArt-XL-2-1024-MS" + ckpt_id_512 = "PixArt-alpha/PixArt-XL-2-512x512" + prompt = "A small cactus with a happy face in the Sahara desert." + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_pixart_1024(self): + generator = torch.Generator("cpu").manual_seed(0) + + pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + prompt = self.prompt + + image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images + + image_slice = image[0, -3:, -3:, -1] + expected_slice = np.array([0.0742, 0.0835, 0.2114, 0.0295, 0.0784, 0.2361, 0.1738, 0.2251, 0.3589]) + + max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice) + self.assertLessEqual(max_diff, 1e-4) + + def test_pixart_512(self): + generator = torch.Generator("cpu").manual_seed(0) + + pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_512, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + + prompt = self.prompt + + image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images + + image_slice = image[0, -3:, -3:, -1] + expected_slice = np.array([0.3477, 0.3882, 0.4541, 0.3413, 0.3821, 0.4463, 0.4001, 0.4409, 0.4958]) + + max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice) + self.assertLessEqual(max_diff, 1e-4) + + def test_pixart_1024_without_resolution_binning(self): + generator = torch.manual_seed(0) + + pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + + prompt = self.prompt + height, width = 1024, 768 + num_inference_steps = 2 + + image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + ).images + image_slice = image[0, -3:, -3:, -1] + + generator = torch.manual_seed(0) + no_res_bin_image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + use_resolution_binning=False, + ).images + no_res_bin_image_slice = no_res_bin_image[0, -3:, -3:, -1] + + assert not np.allclose(image_slice, no_res_bin_image_slice, atol=1e-4, rtol=1e-4) + + def test_pixart_512_without_resolution_binning(self): + generator = torch.manual_seed(0) + + pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_512, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + + prompt = self.prompt + height, width = 512, 768 + num_inference_steps = 2 + + image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + ).images + image_slice = image[0, -3:, -3:, -1] + + generator = torch.manual_seed(0) + no_res_bin_image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + use_resolution_binning=False, + ).images + no_res_bin_image_slice = no_res_bin_image[0, -3:, -3:, -1] + + assert not np.allclose(image_slice, no_res_bin_image_slice, atol=1e-4, rtol=1e-4) diff --git a/pythonProject/diffusers-main/tests/pipelines/pixart_sigma/__init__.py b/pythonProject/diffusers-main/tests/pipelines/pixart_sigma/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/pixart_sigma/test_pixart.py b/pythonProject/diffusers-main/tests/pipelines/pixart_sigma/test_pixart.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb80df81adf9ebf474a29685a3b98d76b3f7d82 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pixart_sigma/test_pixart.py @@ -0,0 +1,414 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import tempfile +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + PixArtSigmaPipeline, + PixArtTransformer2DModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import ( + PipelineTesterMixin, + check_qkv_fusion_matches_attn_procs_length, + check_qkv_fusion_processors_exist, + to_np, +) + + +enable_full_determinism() + + +class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = PixArtSigmaPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + + required_optional_params = PipelineTesterMixin.required_optional_params + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = PixArtTransformer2DModel( + sample_size=8, + num_layers=2, + patch_size=2, + attention_head_dim=8, + num_attention_heads=3, + caption_channels=32, + in_channels=4, + cross_attention_dim=24, + out_channels=8, + attention_bias=True, + activation_fn="gelu-approximate", + num_embeds_ada_norm=1000, + norm_type="ada_norm_single", + norm_elementwise_affine=False, + norm_eps=1e-6, + ) + torch.manual_seed(0) + vae = AutoencoderKL() + + scheduler = DDIMScheduler() + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + components = { + "transformer": transformer.eval(), + "vae": vae.eval(), + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "use_resolution_binning": False, + "output_type": "np", + } + return inputs + + @unittest.skip("Not supported.") + def test_sequential_cpu_offload_forward_pass(self): + # TODO(PVP, Sayak) need to fix later + return + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + self.assertEqual(image.shape, (1, 8, 8, 3)) + expected_slice = np.array([0.6319, 0.3526, 0.3806, 0.6327, 0.4639, 0.4830, 0.2583, 0.5331, 0.4852]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_inference_non_square_images(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs, height=32, width=48).images + image_slice = image[0, -3:, -3:, -1] + self.assertEqual(image.shape, (1, 32, 48, 3)) + + expected_slice = np.array([0.6493, 0.5370, 0.4081, 0.4762, 0.3695, 0.4711, 0.3026, 0.5218, 0.5263]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_inference_with_embeddings_and_multiple_images(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + prompt = inputs["prompt"] + generator = inputs["generator"] + num_inference_steps = inputs["num_inference_steps"] + output_type = inputs["output_type"] + + prompt_embeds, prompt_attn_mask, negative_prompt_embeds, neg_prompt_attn_mask = pipe.encode_prompt(prompt) + + # inputs with prompt converted to embeddings + inputs = { + "prompt_embeds": prompt_embeds, + "prompt_attention_mask": prompt_attn_mask, + "negative_prompt": None, + "negative_prompt_embeds": negative_prompt_embeds, + "negative_prompt_attention_mask": neg_prompt_attn_mask, + "generator": generator, + "num_inference_steps": num_inference_steps, + "output_type": output_type, + "num_images_per_prompt": 2, + "use_resolution_binning": False, + } + + # set all optional components to None + for optional_component in pipe._optional_components: + setattr(pipe, optional_component, None) + + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for optional_component in pipe._optional_components: + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(torch_device) + + generator = inputs["generator"] + num_inference_steps = inputs["num_inference_steps"] + output_type = inputs["output_type"] + + # inputs with prompt converted to embeddings + inputs = { + "prompt_embeds": prompt_embeds, + "prompt_attention_mask": prompt_attn_mask, + "negative_prompt": None, + "negative_prompt_embeds": negative_prompt_embeds, + "negative_prompt_attention_mask": neg_prompt_attn_mask, + "generator": generator, + "num_inference_steps": num_inference_steps, + "output_type": output_type, + "num_images_per_prompt": 2, + "use_resolution_binning": False, + } + + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, 1e-4) + + def test_inference_with_multiple_images_per_prompt(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["num_images_per_prompt"] = 2 + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + self.assertEqual(image.shape, (2, 8, 8, 3)) + expected_slice = np.array([0.6319, 0.3526, 0.3806, 0.6327, 0.4639, 0.4830, 0.2583, 0.5331, 0.4852]) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + @unittest.skip("Test is already covered through encode_prompt isolation.") + def test_save_load_optional_components(self): + pass + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-3) + + def test_fused_qkv_projections(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + original_image_slice = image[0, -3:, -3:, -1] + + # TODO (sayakpaul): will refactor this once `fuse_qkv_projections()` has been added + # to the pipeline level. + pipe.transformer.fuse_qkv_projections() + assert check_qkv_fusion_processors_exist(pipe.transformer), ( + "Something wrong with the fused attention processors. Expected all the attention processors to be fused." + ) + assert check_qkv_fusion_matches_attn_procs_length( + pipe.transformer, pipe.transformer.original_attn_processors + ), "Something wrong with the attention processors concerning the fused QKV projections." + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_fused = image[0, -3:, -3:, -1] + + pipe.transformer.unfuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_disabled = image[0, -3:, -3:, -1] + + assert np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3), ( + "Fusion of QKV projections shouldn't affect the outputs." + ) + assert np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3), ( + "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled." + ) + assert np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2), ( + "Original outputs should match when fused QKV projections are disabled." + ) + + +@slow +@require_torch_accelerator +class PixArtSigmaPipelineIntegrationTests(unittest.TestCase): + ckpt_id_1024 = "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS" + ckpt_id_512 = "PixArt-alpha/PixArt-Sigma-XL-2-512-MS" + prompt = "A small cactus with a happy face in the Sahara desert." + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_pixart_1024(self): + generator = torch.Generator("cpu").manual_seed(0) + + pipe = PixArtSigmaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + prompt = self.prompt + + image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images + + image_slice = image[0, -3:, -3:, -1] + expected_slice = np.array([0.4517, 0.4446, 0.4375, 0.449, 0.4399, 0.4365, 0.4583, 0.4629, 0.4473]) + + max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice) + self.assertLessEqual(max_diff, 1e-4) + + def test_pixart_512(self): + generator = torch.Generator("cpu").manual_seed(0) + + transformer = PixArtTransformer2DModel.from_pretrained( + self.ckpt_id_512, subfolder="transformer", torch_dtype=torch.float16 + ) + pipe = PixArtSigmaPipeline.from_pretrained( + self.ckpt_id_1024, transformer=transformer, torch_dtype=torch.float16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + + prompt = self.prompt + + image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images + + image_slice = image[0, -3:, -3:, -1] + expected_slice = np.array([0.0479, 0.0378, 0.0217, 0.0942, 0.064, 0.0791, 0.2073, 0.1975, 0.2017]) + + max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice) + self.assertLessEqual(max_diff, 1e-4) + + def test_pixart_1024_without_resolution_binning(self): + generator = torch.manual_seed(0) + + pipe = PixArtSigmaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + + prompt = self.prompt + height, width = 1024, 768 + num_inference_steps = 2 + + image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + ).images + image_slice = image[0, -3:, -3:, -1] + + generator = torch.manual_seed(0) + no_res_bin_image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + use_resolution_binning=False, + ).images + no_res_bin_image_slice = no_res_bin_image[0, -3:, -3:, -1] + + assert not np.allclose(image_slice, no_res_bin_image_slice, atol=1e-4, rtol=1e-4) + + def test_pixart_512_without_resolution_binning(self): + generator = torch.manual_seed(0) + + transformer = PixArtTransformer2DModel.from_pretrained( + self.ckpt_id_512, subfolder="transformer", torch_dtype=torch.float16 + ) + pipe = PixArtSigmaPipeline.from_pretrained( + self.ckpt_id_1024, transformer=transformer, torch_dtype=torch.float16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + + prompt = self.prompt + height, width = 512, 768 + num_inference_steps = 2 + + image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + ).images + image_slice = image[0, -3:, -3:, -1] + + generator = torch.manual_seed(0) + no_res_bin_image = pipe( + prompt, + height=height, + width=width, + generator=generator, + num_inference_steps=num_inference_steps, + output_type="np", + use_resolution_binning=False, + ).images + no_res_bin_image_slice = no_res_bin_image[0, -3:, -3:, -1] + + assert not np.allclose(image_slice, no_res_bin_image_slice, atol=1e-4, rtol=1e-4) diff --git a/pythonProject/diffusers-main/tests/pipelines/pndm/__init__.py b/pythonProject/diffusers-main/tests/pipelines/pndm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/pndm/test_pndm.py b/pythonProject/diffusers-main/tests/pipelines/pndm/test_pndm.py new file mode 100644 index 0000000000000000000000000000000000000000..61d6efe88ccd1e75eb28d615389866bd37f365d7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/pndm/test_pndm.py @@ -0,0 +1,88 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch + +from diffusers import PNDMPipeline, PNDMScheduler, UNet2DModel + +from ...testing_utils import enable_full_determinism, nightly, require_torch, torch_device + + +enable_full_determinism() + + +class PNDMPipelineFastTests(unittest.TestCase): + @property + def dummy_uncond_unet(self): + torch.manual_seed(0) + model = UNet2DModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=3, + out_channels=3, + down_block_types=("DownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "UpBlock2D"), + ) + return model + + def test_inference(self): + unet = self.dummy_uncond_unet + scheduler = PNDMScheduler() + + pndm = PNDMPipeline(unet=unet, scheduler=scheduler) + pndm.to(torch_device) + pndm.set_progress_bar_config(disable=None) + + generator = torch.manual_seed(0) + image = pndm(generator=generator, num_inference_steps=20, output_type="np").images + + generator = torch.manual_seed(0) + image_from_tuple = pndm(generator=generator, num_inference_steps=20, output_type="np", return_dict=False)[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + + +@nightly +@require_torch +class PNDMPipelineIntegrationTests(unittest.TestCase): + def test_inference_cifar10(self): + model_id = "google/ddpm-cifar10-32" + + unet = UNet2DModel.from_pretrained(model_id) + scheduler = PNDMScheduler() + + pndm = PNDMPipeline(unet=unet, scheduler=scheduler) + pndm.to(torch_device) + pndm.set_progress_bar_config(disable=None) + generator = torch.manual_seed(0) + image = pndm(generator=generator, output_type="np").images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.1564, 0.14645, 0.1406, 0.14715, 0.12425, 0.14045, 0.13115, 0.12175, 0.125]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/qwenimage/__init__.py b/pythonProject/diffusers-main/tests/pipelines/qwenimage/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage.py b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage.py new file mode 100644 index 0000000000000000000000000000000000000000..8ebfe7d08bc1ffe319bfdedaa499378f762e623a --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage.py @@ -0,0 +1,236 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from transformers import Qwen2_5_VLConfig, Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer + +from diffusers import ( + AutoencoderKLQwenImage, + FlowMatchEulerDiscreteScheduler, + QwenImagePipeline, + QwenImageTransformer2DModel, +) + +from ...testing_utils import enable_full_determinism, torch_device +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class QwenImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = QwenImagePipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + supports_dduf = False + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = QwenImageTransformer2DModel( + patch_size=2, + in_channels=16, + out_channels=4, + num_layers=2, + attention_head_dim=16, + num_attention_heads=3, + joint_attention_dim=16, + guidance_embeds=False, + axes_dims_rope=(8, 4, 4), + ) + + torch.manual_seed(0) + z_dim = 4 + vae = AutoencoderKLQwenImage( + base_dim=z_dim * 6, + z_dim=z_dim, + dim_mult=[1, 2, 4], + num_res_blocks=1, + temperal_downsample=[False, True], + # fmt: off + latents_mean=[0.0] * 4, + latents_std=[1.0] * 4, + # fmt: on + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler() + + torch.manual_seed(0) + config = Qwen2_5_VLConfig( + text_config={ + "hidden_size": 16, + "intermediate_size": 16, + "num_hidden_layers": 2, + "num_attention_heads": 2, + "num_key_value_heads": 2, + "rope_scaling": { + "mrope_section": [1, 1, 2], + "rope_type": "default", + "type": "default", + }, + "rope_theta": 1000000.0, + }, + vision_config={ + "depth": 2, + "hidden_size": 16, + "intermediate_size": 16, + "num_heads": 2, + "out_hidden_size": 16, + }, + hidden_size=16, + vocab_size=152064, + vision_end_token_id=151653, + vision_start_token_id=151652, + vision_token_id=151654, + ) + text_encoder = Qwen2_5_VLForConditionalGeneration(config) + tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 3.0, + "true_cfg_scale": 1.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + } + + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + generated_image = image[0] + self.assertEqual(generated_image.shape, (3, 32, 32)) + + # fmt: off + expected_slice = torch.tensor([0.56331, 0.63677, 0.6015, 0.56369, 0.58166, 0.55277, 0.57176, 0.63261, 0.41466, 0.35561, 0.56229, 0.48334, 0.49714, 0.52622, 0.40872, 0.50208]) + # fmt: on + + generated_slice = generated_image.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1) + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_controlnet.py b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..c78e5cb233d3b9f9d1401e9e830c7be0ea30e292 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_controlnet.py @@ -0,0 +1,339 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from transformers import Qwen2_5_VLConfig, Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer + +from diffusers import ( + AutoencoderKLQwenImage, + FlowMatchEulerDiscreteScheduler, + QwenImageControlNetModel, + QwenImageControlNetPipeline, + QwenImageMultiControlNetModel, + QwenImageTransformer2DModel, +) +from diffusers.utils.testing_utils import enable_full_determinism, torch_device +from diffusers.utils.torch_utils import randn_tensor + +from ..pipeline_params import TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = QwenImageControlNetPipeline + params = (TEXT_TO_IMAGE_PARAMS | frozenset(["control_image", "controlnet_conditioning_scale"])) - { + "cross_attention_kwargs" + } + batch_params = frozenset(["prompt", "negative_prompt", "control_image"]) + image_params = frozenset(["control_image"]) + image_latents_params = frozenset(["latents"]) + + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "control_image", + "controlnet_conditioning_scale", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + + supports_dduf = False + test_xformers_attention = True + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = QwenImageTransformer2DModel( + patch_size=2, + in_channels=16, + out_channels=4, + num_layers=2, + attention_head_dim=16, + num_attention_heads=3, + joint_attention_dim=16, + guidance_embeds=False, + axes_dims_rope=(8, 4, 4), + ) + + torch.manual_seed(0) + controlnet = QwenImageControlNetModel( + patch_size=2, + in_channels=16, + out_channels=4, + num_layers=2, + attention_head_dim=16, + num_attention_heads=3, + joint_attention_dim=16, + axes_dims_rope=(8, 4, 4), + ) + + torch.manual_seed(0) + z_dim = 4 + vae = AutoencoderKLQwenImage( + base_dim=z_dim * 6, + z_dim=z_dim, + dim_mult=[1, 2, 4], + num_res_blocks=1, + temperal_downsample=[False, True], + latents_mean=[0.0] * z_dim, + latents_std=[1.0] * z_dim, + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler() + + torch.manual_seed(0) + config = Qwen2_5_VLConfig( + text_config={ + "hidden_size": 16, + "intermediate_size": 16, + "num_hidden_layers": 2, + "num_attention_heads": 2, + "num_key_value_heads": 2, + "rope_scaling": { + "mrope_section": [1, 1, 2], + "rope_type": "default", + "type": "default", + }, + "rope_theta": 1_000_000.0, + }, + vision_config={ + "depth": 2, + "hidden_size": 16, + "intermediate_size": 16, + "num_heads": 2, + "out_hidden_size": 16, + }, + hidden_size=16, + vocab_size=152064, + vision_end_token_id=151653, + vision_start_token_id=151652, + vision_token_id=151654, + ) + + text_encoder = Qwen2_5_VLForConditionalGeneration(config) + tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "controlnet": controlnet, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + control_image = randn_tensor( + (1, 3, 32, 32), + generator=generator, + device=torch.device(device), + dtype=torch.float32, + ) + + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 3.0, + "true_cfg_scale": 1.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "control_image": control_image, + "controlnet_conditioning_scale": 0.5, + "output_type": "pt", + } + + return inputs + + def test_qwen_controlnet(self): + device = "cpu" + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + generated_image = image[0] + self.assertEqual(generated_image.shape, (3, 32, 32)) + + # Expected slice from the generated image + expected_slice = torch.tensor( + [ + 0.4726, + 0.5549, + 0.6324, + 0.6548, + 0.4968, + 0.4639, + 0.4749, + 0.4898, + 0.4725, + 0.4645, + 0.4435, + 0.3339, + 0.3400, + 0.4630, + 0.3879, + 0.4406, + ] + ) + + generated_slice = generated_image.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + def test_qwen_controlnet_multicondition(self): + device = "cpu" + components = self.get_dummy_components() + + components["controlnet"] = QwenImageMultiControlNetModel([components["controlnet"]]) + + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + control_image = inputs["control_image"] + inputs["control_image"] = [control_image, control_image] + inputs["controlnet_conditioning_scale"] = [0.5, 0.5] + + image = pipe(**inputs).images + generated_image = image[0] + self.assertEqual(generated_image.shape, (3, 32, 32)) + # Expected slice from the generated image + expected_slice = torch.tensor( + [ + 0.6239, + 0.6642, + 0.5768, + 0.6039, + 0.5270, + 0.5070, + 0.5006, + 0.5271, + 0.4506, + 0.3085, + 0.3435, + 0.5152, + 0.5096, + 0.5422, + 0.4286, + 0.5752, + ] + ) + + generated_slice = generated_image.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + inputs["control_image"] = randn_tensor( + (1, 3, 128, 128), + generator=inputs["generator"], + device=torch.device(generator_device), + dtype=torch.float32, + ) + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + inputs["control_image"] = randn_tensor( + (1, 3, 128, 128), + generator=inputs["generator"], + device=torch.device(generator_device), + dtype=torch.float32, + ) + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_edit.py b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_edit.py new file mode 100644 index 0000000000000000000000000000000000000000..058548cf5f1b8ea7dcfb3e8eaa1758c439a7ba34 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_edit.py @@ -0,0 +1,243 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import pytest +import torch +from PIL import Image +from transformers import Qwen2_5_VLConfig, Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer, Qwen2VLProcessor + +from diffusers import ( + AutoencoderKLQwenImage, + FlowMatchEulerDiscreteScheduler, + QwenImageEditPipeline, + QwenImageTransformer2DModel, +) + +from ...testing_utils import enable_full_determinism, torch_device +from ..pipeline_params import TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class QwenImageEditPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = QwenImageEditPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = frozenset(["prompt", "image"]) + image_params = frozenset(["image"]) + image_latents_params = frozenset(["latents"]) + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + supports_dduf = False + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + tiny_ckpt_id = "hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration" + + torch.manual_seed(0) + transformer = QwenImageTransformer2DModel( + patch_size=2, + in_channels=16, + out_channels=4, + num_layers=2, + attention_head_dim=16, + num_attention_heads=3, + joint_attention_dim=16, + guidance_embeds=False, + axes_dims_rope=(8, 4, 4), + ) + + torch.manual_seed(0) + z_dim = 4 + vae = AutoencoderKLQwenImage( + base_dim=z_dim * 6, + z_dim=z_dim, + dim_mult=[1, 2, 4], + num_res_blocks=1, + temperal_downsample=[False, True], + latents_mean=[0.0] * z_dim, + latents_std=[1.0] * z_dim, + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler() + + torch.manual_seed(0) + config = Qwen2_5_VLConfig( + text_config={ + "hidden_size": 16, + "intermediate_size": 16, + "num_hidden_layers": 2, + "num_attention_heads": 2, + "num_key_value_heads": 2, + "rope_scaling": { + "mrope_section": [1, 1, 2], + "rope_type": "default", + "type": "default", + }, + "rope_theta": 1000000.0, + }, + vision_config={ + "depth": 2, + "hidden_size": 16, + "intermediate_size": 16, + "num_heads": 2, + "out_hidden_size": 16, + }, + hidden_size=16, + vocab_size=152064, + vision_end_token_id=151653, + vision_start_token_id=151652, + vision_token_id=151654, + ) + text_encoder = Qwen2_5_VLForConditionalGeneration(config) + tokenizer = Qwen2Tokenizer.from_pretrained(tiny_ckpt_id) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "processor": Qwen2VLProcessor.from_pretrained(tiny_ckpt_id), + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + inputs = { + "prompt": "dance monkey", + "image": Image.new("RGB", (32, 32)), + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "true_cfg_scale": 1.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + } + + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + generated_image = image[0] + self.assertEqual(generated_image.shape, (3, 32, 32)) + + # fmt: off + expected_slice = torch.tensor([[0.5637, 0.6341, 0.6001, 0.5620, 0.5794, 0.5498, 0.5757, 0.6389, 0.4174, 0.3597, 0.5649, 0.4894, 0.4969, 0.5255, 0.4083, 0.4986]]) + # fmt: on + + generated_slice = generated_image.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1) + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) + + @pytest.mark.xfail(condition=True, reason="Preconfigured embeddings need to be revisited.", strict=True) + def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict=None, atol=1e-4, rtol=1e-4): + super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, atol, rtol) diff --git a/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_img2img.py b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..07e683ec7f5a6dde2c3f2ed34de079cd7a49efd6 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_img2img.py @@ -0,0 +1,218 @@ +import random +import unittest + +import numpy as np +import torch +from transformers import Qwen2_5_VLConfig, Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer + +from diffusers import ( + AutoencoderKLQwenImage, + FlowMatchEulerDiscreteScheduler, + QwenImageImg2ImgPipeline, + QwenImageTransformer2DModel, +) + +from ...testing_utils import ( + enable_full_determinism, + floats_tensor, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class QwenImageImg2ImgPipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = QwenImageImg2ImgPipeline + params = frozenset(["prompt", "image", "height", "width", "guidance_scale", "true_cfg_scale", "strength"]) + batch_params = frozenset(["prompt", "image"]) + image_params = frozenset(["image"]) + image_latents_params = frozenset(["latents"]) + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + supports_dduf = False + test_xformers_attention = False + test_attention_slicing = True + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = QwenImageTransformer2DModel( + patch_size=2, + in_channels=16, + out_channels=4, + num_layers=2, + attention_head_dim=16, + num_attention_heads=3, + joint_attention_dim=16, + guidance_embeds=False, + axes_dims_rope=(8, 4, 4), + ) + + torch.manual_seed(0) + z_dim = 4 + vae = AutoencoderKLQwenImage( + base_dim=z_dim * 6, + z_dim=z_dim, + dim_mult=[1, 2, 4], + num_res_blocks=1, + temperal_downsample=[False, True], + latents_mean=[0.0] * 4, + latents_std=[1.0] * 4, + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler() + + torch.manual_seed(0) + config = Qwen2_5_VLConfig( + text_config={ + "hidden_size": 16, + "intermediate_size": 16, + "num_hidden_layers": 2, + "num_attention_heads": 2, + "num_key_value_heads": 2, + "rope_scaling": { + "mrope_section": [1, 1, 2], + "rope_type": "default", + "type": "default", + }, + "rope_theta": 1000000.0, + }, + vision_config={ + "depth": 2, + "hidden_size": 16, + "intermediate_size": 16, + "num_heads": 2, + "out_hidden_size": 16, + }, + hidden_size=16, + vocab_size=152064, + vision_end_token_id=151653, + vision_start_token_id=151652, + vision_token_id=151654, + ) + text_encoder = Qwen2_5_VLForConditionalGeneration(config) + tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration") + + return { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "image": image, + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 3.0, + "true_cfg_scale": 1.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + } + + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + generated_image = image[0] + self.assertEqual(generated_image.shape, (3, 32, 32)) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1) + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs).images[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs).images[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs).images[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..b564624540c3eafd81f84e501ad590246328cc92 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/qwenimage/test_qwenimage_inpaint.py @@ -0,0 +1,233 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import unittest + +import numpy as np +import torch +from transformers import Qwen2_5_VLConfig, Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer + +from diffusers import ( + AutoencoderKLQwenImage, + FlowMatchEulerDiscreteScheduler, + QwenImageInpaintPipeline, + QwenImageTransformer2DModel, +) + +from ...testing_utils import enable_full_determinism, floats_tensor, torch_device +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class QwenImageInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = QwenImageInpaintPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + supports_dduf = False + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = QwenImageTransformer2DModel( + patch_size=2, + in_channels=16, + out_channels=4, + num_layers=2, + attention_head_dim=16, + num_attention_heads=3, + joint_attention_dim=16, + guidance_embeds=False, + axes_dims_rope=(8, 4, 4), + ) + + torch.manual_seed(0) + z_dim = 4 + vae = AutoencoderKLQwenImage( + base_dim=z_dim * 6, + z_dim=z_dim, + dim_mult=[1, 2, 4], + num_res_blocks=1, + temperal_downsample=[False, True], + # fmt: off + latents_mean=[0.0] * 4, + latents_std=[1.0] * 4, + # fmt: on + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler() + + torch.manual_seed(0) + config = Qwen2_5_VLConfig( + text_config={ + "hidden_size": 16, + "intermediate_size": 16, + "num_hidden_layers": 2, + "num_attention_heads": 2, + "num_key_value_heads": 2, + "rope_scaling": { + "mrope_section": [1, 1, 2], + "rope_type": "default", + "type": "default", + }, + "rope_theta": 1000000.0, + }, + vision_config={ + "depth": 2, + "hidden_size": 16, + "intermediate_size": 16, + "num_heads": 2, + "out_hidden_size": 16, + }, + hidden_size=16, + vocab_size=152064, + vision_end_token_id=151653, + vision_start_token_id=151652, + vision_token_id=151654, + ) + text_encoder = Qwen2_5_VLForConditionalGeneration(config) + tokenizer = Qwen2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + mask_image = torch.ones((1, 1, 32, 32)).to(device) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "image": image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 3.0, + "true_cfg_scale": 1.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + } + + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + generated_image = image[0] + self.assertEqual(generated_image.shape, (3, 32, 32)) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-1) + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/sana/__init__.py b/pythonProject/diffusers-main/tests/pipelines/sana/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/sana/test_sana.py b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana.py new file mode 100644 index 0000000000000000000000000000000000000000..34ea3079b1431322697f1c3ff1c803e957fc09f8 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana.py @@ -0,0 +1,373 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import inspect +import unittest + +import numpy as np +import torch +from transformers import Gemma2Config, Gemma2Model, GemmaTokenizer + +from diffusers import AutoencoderDC, FlowMatchEulerDiscreteScheduler, SanaPipeline, SanaTransformer2DModel + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class SanaPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SanaPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SanaTransformer2DModel( + patch_size=1, + in_channels=4, + out_channels=4, + num_layers=1, + num_attention_heads=2, + attention_head_dim=4, + num_cross_attention_heads=2, + cross_attention_head_dim=4, + cross_attention_dim=8, + caption_channels=8, + sample_size=32, + ) + + torch.manual_seed(0) + vae = AutoencoderDC( + in_channels=3, + latent_channels=4, + attention_head_dim=2, + encoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + decoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + encoder_block_out_channels=(8, 8), + decoder_block_out_channels=(8, 8), + encoder_qkv_multiscales=((), (5,)), + decoder_qkv_multiscales=((), (5,)), + encoder_layers_per_block=(1, 1), + decoder_layers_per_block=[1, 1], + downsample_block_type="conv", + upsample_block_type="interpolate", + decoder_norm_types="rms_norm", + decoder_act_fns="silu", + scaling_factor=0.41407, + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + + torch.manual_seed(0) + text_encoder_config = Gemma2Config( + head_dim=16, + hidden_size=8, + initializer_range=0.02, + intermediate_size=64, + max_position_embeddings=8192, + model_type="gemma2", + num_attention_heads=2, + num_hidden_layers=1, + num_key_value_heads=2, + vocab_size=8, + attn_implementation="eager", + ) + text_encoder = Gemma2Model(text_encoder_config) + tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "", + "negative_prompt": "", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + "complex_human_instruction": None, + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs)[0] + generated_image = image[0] + + self.assertEqual(generated_image.shape, (3, 32, 32)) + expected_image = torch.randn(3, 32, 32) + max_diff = np.abs(generated_image - expected_image).max() + self.assertLessEqual(max_diff, 1e10) + + def test_callback_inputs(self): + sig = inspect.signature(self.pipeline_class.__call__) + has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters + has_callback_step_end = "callback_on_step_end" in sig.parameters + + if not (has_callback_tensor_inputs and has_callback_step_end): + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + self.assertTrue( + hasattr(pipe, "_callback_tensor_inputs"), + f" {self.pipeline_class} should have `_callback_tensor_inputs` that defines a list of tensor variables its callback function can use as inputs", + ) + + def callback_inputs_subset(pipe, i, t, callback_kwargs): + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + def callback_inputs_all(pipe, i, t, callback_kwargs): + for tensor_name in pipe._callback_tensor_inputs: + assert tensor_name in callback_kwargs + + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + + # Test passing in a subset + inputs["callback_on_step_end"] = callback_inputs_subset + inputs["callback_on_step_end_tensor_inputs"] = ["latents"] + output = pipe(**inputs)[0] + + # Test passing in a everything + inputs["callback_on_step_end"] = callback_inputs_all + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + + def callback_inputs_change_tensor(pipe, i, t, callback_kwargs): + is_last = i == (pipe.num_timesteps - 1) + if is_last: + callback_kwargs["latents"] = torch.zeros_like(callback_kwargs["latents"]) + return callback_kwargs + + inputs["callback_on_step_end"] = callback_inputs_change_tensor + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + assert output.abs().sum() < 1e10 + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) + + # TODO(aryan): Create a dummy gemma model with smol vocab size + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_consistent(self): + pass + + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_single_identical(self): + pass + + def test_float16_inference(self): + # Requires higher tolerance as model seems very sensitive to dtype + super().test_float16_inference(expected_max_diff=0.08) + + +@slow +@require_torch_accelerator +class SanaPipelineIntegrationTests(unittest.TestCase): + prompt = "A painting of a squirrel eating a burger." + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_sana_1024(self): + generator = torch.Generator("cpu").manual_seed(0) + + pipe = SanaPipeline.from_pretrained( + "Efficient-Large-Model/Sana_1600M_1024px_diffusers", torch_dtype=torch.float16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + + image = pipe( + prompt=self.prompt, + height=1024, + width=1024, + generator=generator, + num_inference_steps=20, + output_type="np", + ).images[0] + + image = image.flatten() + output_slice = np.concatenate((image[:16], image[-16:])) + + # fmt: off + expected_slice = np.array([0.0427, 0.0789, 0.0662, 0.0464, 0.082, 0.0574, 0.0535, 0.0886, 0.0647, 0.0549, 0.0872, 0.0605, 0.0593, 0.0942, 0.0674, 0.0581, 0.0076, 0.0168, 0.0027, 0.0063, 0.0159, 0.0, 0.0071, 0.0198, 0.0034, 0.0105, 0.0212, 0.0, 0.0, 0.0166, 0.0042, 0.0125]) + # fmt: on + + self.assertTrue(np.allclose(output_slice, expected_slice, atol=1e-4)) + + def test_sana_512(self): + generator = torch.Generator("cpu").manual_seed(0) + + pipe = SanaPipeline.from_pretrained( + "Efficient-Large-Model/Sana_1600M_512px_diffusers", torch_dtype=torch.float16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + + image = pipe( + prompt=self.prompt, + height=512, + width=512, + generator=generator, + num_inference_steps=20, + output_type="np", + ).images[0] + + image = image.flatten() + output_slice = np.concatenate((image[:16], image[-16:])) + + # fmt: off + expected_slice = np.array([0.0803, 0.0774, 0.1108, 0.0872, 0.093, 0.1118, 0.0952, 0.0898, 0.1038, 0.0818, 0.0754, 0.0894, 0.074, 0.0691, 0.0906, 0.0671, 0.0154, 0.0254, 0.0203, 0.0178, 0.0283, 0.0193, 0.0215, 0.0273, 0.0188, 0.0212, 0.0273, 0.0151, 0.0061, 0.0244, 0.0212, 0.0259]) + # fmt: on + + self.assertTrue(np.allclose(output_slice, expected_slice, atol=1e-4)) diff --git a/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_controlnet.py b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..043e276fcb844741c0d18744db1347a1162b373e --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_controlnet.py @@ -0,0 +1,328 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import unittest + +import numpy as np +import torch +from transformers import Gemma2Config, Gemma2Model, GemmaTokenizer + +from diffusers import ( + AutoencoderDC, + FlowMatchEulerDiscreteScheduler, + SanaControlNetModel, + SanaControlNetPipeline, + SanaTransformer2DModel, +) +from diffusers.utils.torch_utils import randn_tensor + +from ...testing_utils import ( + enable_full_determinism, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class SanaControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SanaControlNetPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + controlnet = SanaControlNetModel( + patch_size=1, + in_channels=4, + out_channels=4, + num_layers=1, + num_attention_heads=2, + attention_head_dim=4, + num_cross_attention_heads=2, + cross_attention_head_dim=4, + cross_attention_dim=8, + caption_channels=8, + sample_size=32, + ) + + torch.manual_seed(0) + transformer = SanaTransformer2DModel( + patch_size=1, + in_channels=4, + out_channels=4, + num_layers=1, + num_attention_heads=2, + attention_head_dim=4, + num_cross_attention_heads=2, + cross_attention_head_dim=4, + cross_attention_dim=8, + caption_channels=8, + sample_size=32, + ) + + torch.manual_seed(0) + vae = AutoencoderDC( + in_channels=3, + latent_channels=4, + attention_head_dim=2, + encoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + decoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + encoder_block_out_channels=(8, 8), + decoder_block_out_channels=(8, 8), + encoder_qkv_multiscales=((), (5,)), + decoder_qkv_multiscales=((), (5,)), + encoder_layers_per_block=(1, 1), + decoder_layers_per_block=[1, 1], + downsample_block_type="conv", + upsample_block_type="interpolate", + decoder_norm_types="rms_norm", + decoder_act_fns="silu", + scaling_factor=0.41407, + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + + torch.manual_seed(0) + text_encoder_config = Gemma2Config( + head_dim=16, + hidden_size=8, + initializer_range=0.02, + intermediate_size=64, + max_position_embeddings=8192, + model_type="gemma2", + num_attention_heads=2, + num_hidden_layers=1, + num_key_value_heads=2, + vocab_size=8, + attn_implementation="eager", + ) + text_encoder = Gemma2Model(text_encoder_config) + tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "controlnet": controlnet, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + control_image = randn_tensor((1, 3, 32, 32), generator=generator, device=device) + inputs = { + "prompt": "", + "negative_prompt": "", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + "complex_human_instruction": None, + "control_image": control_image, + "controlnet_conditioning_scale": 1.0, + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs)[0] + generated_image = image[0] + + self.assertEqual(generated_image.shape, (3, 32, 32)) + expected_image = torch.randn(3, 32, 32) + max_diff = np.abs(generated_image - expected_image).max() + self.assertLessEqual(max_diff, 1e10) + + def test_callback_inputs(self): + sig = inspect.signature(self.pipeline_class.__call__) + has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters + has_callback_step_end = "callback_on_step_end" in sig.parameters + + if not (has_callback_tensor_inputs and has_callback_step_end): + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + self.assertTrue( + hasattr(pipe, "_callback_tensor_inputs"), + f" {self.pipeline_class} should have `_callback_tensor_inputs` that defines a list of tensor variables its callback function can use as inputs", + ) + + def callback_inputs_subset(pipe, i, t, callback_kwargs): + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + def callback_inputs_all(pipe, i, t, callback_kwargs): + for tensor_name in pipe._callback_tensor_inputs: + assert tensor_name in callback_kwargs + + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + + # Test passing in a subset + inputs["callback_on_step_end"] = callback_inputs_subset + inputs["callback_on_step_end_tensor_inputs"] = ["latents"] + output = pipe(**inputs)[0] + + # Test passing in a everything + inputs["callback_on_step_end"] = callback_inputs_all + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + + def callback_inputs_change_tensor(pipe, i, t, callback_kwargs): + is_last = i == (pipe.num_timesteps - 1) + if is_last: + callback_kwargs["latents"] = torch.zeros_like(callback_kwargs["latents"]) + return callback_kwargs + + inputs["callback_on_step_end"] = callback_inputs_change_tensor + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + assert output.abs().sum() < 1e10 + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) + + # TODO(aryan): Create a dummy gemma model with smol vocab size + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_consistent(self): + pass + + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_single_identical(self): + pass + + def test_float16_inference(self): + # Requires higher tolerance as model seems very sensitive to dtype + super().test_float16_inference(expected_max_diff=0.08) diff --git a/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_sprint.py b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_sprint.py new file mode 100644 index 0000000000000000000000000000000000000000..fee2304dce1b0a1e180c2c8a9a44ae2cdf8d5a20 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_sprint.py @@ -0,0 +1,302 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import unittest + +import numpy as np +import torch +from transformers import Gemma2Config, Gemma2Model, GemmaTokenizer + +from diffusers import AutoencoderDC, SanaSprintPipeline, SanaTransformer2DModel, SCMScheduler + +from ...testing_utils import ( + enable_full_determinism, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class SanaSprintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SanaSprintPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs", "negative_prompt", "negative_prompt_embeds"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS - {"negative_prompt"} + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS - {"negative_prompt"} + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SanaTransformer2DModel( + patch_size=1, + in_channels=4, + out_channels=4, + num_layers=1, + num_attention_heads=2, + attention_head_dim=4, + num_cross_attention_heads=2, + cross_attention_head_dim=4, + cross_attention_dim=8, + caption_channels=8, + sample_size=32, + qk_norm="rms_norm_across_heads", + guidance_embeds=True, + ) + + torch.manual_seed(0) + vae = AutoencoderDC( + in_channels=3, + latent_channels=4, + attention_head_dim=2, + encoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + decoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + encoder_block_out_channels=(8, 8), + decoder_block_out_channels=(8, 8), + encoder_qkv_multiscales=((), (5,)), + decoder_qkv_multiscales=((), (5,)), + encoder_layers_per_block=(1, 1), + decoder_layers_per_block=[1, 1], + downsample_block_type="conv", + upsample_block_type="interpolate", + decoder_norm_types="rms_norm", + decoder_act_fns="silu", + scaling_factor=0.41407, + ) + + torch.manual_seed(0) + scheduler = SCMScheduler() + + torch.manual_seed(0) + text_encoder_config = Gemma2Config( + head_dim=16, + hidden_size=8, + initializer_range=0.02, + intermediate_size=64, + max_position_embeddings=8192, + model_type="gemma2", + num_attention_heads=2, + num_hidden_layers=1, + num_key_value_heads=2, + vocab_size=8, + attn_implementation="eager", + ) + text_encoder = Gemma2Model(text_encoder_config) + tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + "complex_human_instruction": None, + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs)[0] + generated_image = image[0] + + self.assertEqual(generated_image.shape, (3, 32, 32)) + expected_image = torch.randn(3, 32, 32) + max_diff = np.abs(generated_image - expected_image).max() + self.assertLessEqual(max_diff, 1e10) + + def test_callback_inputs(self): + sig = inspect.signature(self.pipeline_class.__call__) + has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters + has_callback_step_end = "callback_on_step_end" in sig.parameters + + if not (has_callback_tensor_inputs and has_callback_step_end): + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + self.assertTrue( + hasattr(pipe, "_callback_tensor_inputs"), + f" {self.pipeline_class} should have `_callback_tensor_inputs` that defines a list of tensor variables its callback function can use as inputs", + ) + + def callback_inputs_subset(pipe, i, t, callback_kwargs): + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + def callback_inputs_all(pipe, i, t, callback_kwargs): + for tensor_name in pipe._callback_tensor_inputs: + assert tensor_name in callback_kwargs + + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + + # Test passing in a subset + inputs["callback_on_step_end"] = callback_inputs_subset + inputs["callback_on_step_end_tensor_inputs"] = ["latents"] + output = pipe(**inputs)[0] + + # Test passing in a everything + inputs["callback_on_step_end"] = callback_inputs_all + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + + def callback_inputs_change_tensor(pipe, i, t, callback_kwargs): + is_last = i == (pipe.num_timesteps - 1) + if is_last: + callback_kwargs["latents"] = torch.zeros_like(callback_kwargs["latents"]) + return callback_kwargs + + inputs["callback_on_step_end"] = callback_inputs_change_tensor + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + assert output.abs().sum() < 1e10 + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) + + # TODO(aryan): Create a dummy gemma model with smol vocab size + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_consistent(self): + pass + + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_single_identical(self): + pass + + def test_float16_inference(self): + # Requires higher tolerance as model seems very sensitive to dtype + super().test_float16_inference(expected_max_diff=0.08) diff --git a/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_sprint_img2img.py b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_sprint_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..c218abb8e951bee50cce5d4c5a13c62edce7575a --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/sana/test_sana_sprint_img2img.py @@ -0,0 +1,314 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import unittest + +import numpy as np +import torch +from transformers import Gemma2Config, Gemma2Model, GemmaTokenizer + +from diffusers import AutoencoderDC, SanaSprintImg2ImgPipeline, SanaTransformer2DModel, SCMScheduler +from diffusers.utils.torch_utils import randn_tensor + +from ...testing_utils import ( + enable_full_determinism, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, +) +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class SanaSprintImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SanaSprintImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - { + "negative_prompt", + "negative_prompt_embeds", + } + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS - {"negative_prompt"} + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SanaTransformer2DModel( + patch_size=1, + in_channels=4, + out_channels=4, + num_layers=1, + num_attention_heads=2, + attention_head_dim=4, + num_cross_attention_heads=2, + cross_attention_head_dim=4, + cross_attention_dim=8, + caption_channels=8, + sample_size=32, + qk_norm="rms_norm_across_heads", + guidance_embeds=True, + ) + + torch.manual_seed(0) + vae = AutoencoderDC( + in_channels=3, + latent_channels=4, + attention_head_dim=2, + encoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + decoder_block_types=( + "ResBlock", + "EfficientViTBlock", + ), + encoder_block_out_channels=(8, 8), + decoder_block_out_channels=(8, 8), + encoder_qkv_multiscales=((), (5,)), + decoder_qkv_multiscales=((), (5,)), + encoder_layers_per_block=(1, 1), + decoder_layers_per_block=[1, 1], + downsample_block_type="conv", + upsample_block_type="interpolate", + decoder_norm_types="rms_norm", + decoder_act_fns="silu", + scaling_factor=0.41407, + ) + + torch.manual_seed(0) + scheduler = SCMScheduler() + + torch.manual_seed(0) + text_encoder_config = Gemma2Config( + head_dim=16, + hidden_size=8, + initializer_range=0.02, + intermediate_size=64, + max_position_embeddings=8192, + model_type="gemma2", + num_attention_heads=2, + num_hidden_layers=1, + num_key_value_heads=2, + vocab_size=8, + attn_implementation="eager", + ) + text_encoder = Gemma2Model(text_encoder_config) + tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma") + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image = randn_tensor((1, 3, 32, 32), generator=generator, device=device) + inputs = { + "prompt": "", + "image": image, + "strength": 0.5, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "pt", + "complex_human_instruction": None, + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs)[0] + generated_image = image[0] + + self.assertEqual(generated_image.shape, (3, 32, 32)) + expected_image = torch.randn(3, 32, 32) + max_diff = np.abs(generated_image - expected_image).max() + self.assertLessEqual(max_diff, 1e10) + + def test_callback_inputs(self): + sig = inspect.signature(self.pipeline_class.__call__) + has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters + has_callback_step_end = "callback_on_step_end" in sig.parameters + + if not (has_callback_tensor_inputs and has_callback_step_end): + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + self.assertTrue( + hasattr(pipe, "_callback_tensor_inputs"), + f" {self.pipeline_class} should have `_callback_tensor_inputs` that defines a list of tensor variables its callback function can use as inputs", + ) + + def callback_inputs_subset(pipe, i, t, callback_kwargs): + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + def callback_inputs_all(pipe, i, t, callback_kwargs): + for tensor_name in pipe._callback_tensor_inputs: + assert tensor_name in callback_kwargs + + # iterate over callback args + for tensor_name, tensor_value in callback_kwargs.items(): + # check that we're only passing in allowed tensor inputs + assert tensor_name in pipe._callback_tensor_inputs + + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + + # Test passing in a subset + inputs["callback_on_step_end"] = callback_inputs_subset + inputs["callback_on_step_end_tensor_inputs"] = ["latents"] + output = pipe(**inputs)[0] + + # Test passing in a everything + inputs["callback_on_step_end"] = callback_inputs_all + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + + def callback_inputs_change_tensor(pipe, i, t, callback_kwargs): + is_last = i == (pipe.num_timesteps - 1) + if is_last: + callback_kwargs["latents"] = torch.zeros_like(callback_kwargs["latents"]) + return callback_kwargs + + inputs["callback_on_step_end"] = callback_inputs_change_tensor + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + output = pipe(**inputs)[0] + assert output.abs().sum() < 1e10 + + def test_attention_slicing_forward_pass( + self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3 + ): + if not self.test_attention_slicing: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_slicing = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=1) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing1 = pipe(**inputs)[0] + + pipe.enable_attention_slicing(slice_size=2) + inputs = self.get_dummy_inputs(generator_device) + output_with_slicing2 = pipe(**inputs)[0] + + if test_max_difference: + max_diff1 = np.abs(to_np(output_with_slicing1) - to_np(output_without_slicing)).max() + max_diff2 = np.abs(to_np(output_with_slicing2) - to_np(output_without_slicing)).max() + self.assertLess( + max(max_diff1, max_diff2), + expected_max_diff, + "Attention slicing should not affect the inference results", + ) + + @unittest.skip("vae tiling resulted in a small margin over the expected max diff, so skipping this test for now") + def test_vae_tiling(self, expected_diff_max: float = 0.2): + generator_device = "cpu" + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe.to("cpu") + pipe.set_progress_bar_config(disable=None) + + # Without tiling + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_without_tiling = pipe(**inputs)[0] + + # With tiling + pipe.vae.enable_tiling( + tile_sample_min_height=96, + tile_sample_min_width=96, + tile_sample_stride_height=64, + tile_sample_stride_width=64, + ) + inputs = self.get_dummy_inputs(generator_device) + inputs["height"] = inputs["width"] = 128 + output_with_tiling = pipe(**inputs)[0] + + self.assertLess( + (to_np(output_without_tiling) - to_np(output_with_tiling)).max(), + expected_diff_max, + "VAE tiling should not affect the inference results", + ) + + # TODO(aryan): Create a dummy gemma model with smol vocab size + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_consistent(self): + pass + + @unittest.skip( + "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." + ) + def test_inference_batch_single_identical(self): + pass + + def test_float16_inference(self): + # Requires higher tolerance as model seems very sensitive to dtype + super().test_float16_inference(expected_max_diff=0.08) diff --git a/pythonProject/diffusers-main/tests/pipelines/shap_e/__init__.py b/pythonProject/diffusers-main/tests/pipelines/shap_e/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/shap_e/test_shap_e.py b/pythonProject/diffusers-main/tests/pipelines/shap_e/test_shap_e.py new file mode 100644 index 0000000000000000000000000000000000000000..99fd286929818aff5821d79ddebae70c6b48588b --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/shap_e/test_shap_e.py @@ -0,0 +1,267 @@ +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import HeunDiscreteScheduler, PriorTransformer, ShapEPipeline +from diffusers.pipelines.shap_e import ShapERenderer + +from ...testing_utils import ( + backend_empty_cache, + load_numpy, + nightly, + require_torch_accelerator, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference + + +class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = ShapEPipeline + params = ["prompt"] + batch_params = ["prompt"] + required_optional_params = [ + "num_images_per_prompt", + "num_inference_steps", + "generator", + "latents", + "guidance_scale", + "frame_size", + "output_type", + "return_dict", + ] + test_xformers_attention = False + + @property + def text_embedder_hidden_size(self): + return 16 + + @property + def time_input_dim(self): + return 16 + + @property + def time_embed_dim(self): + return self.time_input_dim * 4 + + @property + def renderer_dim(self): + return 8 + + @property + def dummy_tokenizer(self): + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + return tokenizer + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=self.text_embedder_hidden_size, + projection_dim=self.text_embedder_hidden_size, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + return CLIPTextModelWithProjection(config) + + @property + def dummy_prior(self): + torch.manual_seed(0) + + model_kwargs = { + "num_attention_heads": 2, + "attention_head_dim": 16, + "embedding_dim": self.time_input_dim, + "num_embeddings": 32, + "embedding_proj_dim": self.text_embedder_hidden_size, + "time_embed_dim": self.time_embed_dim, + "num_layers": 1, + "clip_embed_dim": self.time_input_dim * 2, + "additional_embeddings": 0, + "time_embed_act_fn": "gelu", + "norm_in_type": "layer", + "encoder_hid_proj_type": None, + "added_emb_type": None, + } + + model = PriorTransformer(**model_kwargs) + return model + + @property + def dummy_renderer(self): + torch.manual_seed(0) + + model_kwargs = { + "param_shapes": ( + (self.renderer_dim, 93), + (self.renderer_dim, 8), + (self.renderer_dim, 8), + (self.renderer_dim, 8), + ), + "d_latent": self.time_input_dim, + "d_hidden": self.renderer_dim, + "n_output": 12, + "background": ( + 0.1, + 0.1, + 0.1, + ), + } + model = ShapERenderer(**model_kwargs) + return model + + def get_dummy_components(self): + prior = self.dummy_prior + text_encoder = self.dummy_text_encoder + tokenizer = self.dummy_tokenizer + shap_e_renderer = self.dummy_renderer + + scheduler = HeunDiscreteScheduler( + beta_schedule="exp", + num_train_timesteps=1024, + prediction_type="sample", + use_karras_sigmas=True, + clip_sample=True, + clip_sample_range=1.0, + ) + components = { + "prior": prior, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "shap_e_renderer": shap_e_renderer, + "scheduler": scheduler, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "horse", + "generator": generator, + "num_inference_steps": 1, + "frame_size": 32, + "output_type": "latent", + } + return inputs + + def test_shap_e(self): + device = "cpu" + + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + + pipe.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(device)) + image = output.images[0] + image = image.cpu().numpy() + image_slice = image[-3:, -3:] + + assert image.shape == (32, 16) + + expected_slice = np.array([-1.0000, -0.6559, 1.0000, -0.9096, -0.7252, 0.8211, -0.7647, -0.3308, 0.6462]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_inference_batch_consistent(self): + # NOTE: Larger batch sizes cause this test to timeout, only test on smaller batches + self._test_inference_batch_consistent(batch_sizes=[1, 2]) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(batch_size=2, expected_max_diff=6e-3) + + def test_num_images_per_prompt(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + batch_size = 1 + num_images_per_prompt = 2 + + inputs = self.get_dummy_inputs(torch_device) + + for key in inputs.keys(): + if key in self.batch_params: + inputs[key] = batch_size * [inputs[key]] + + images = pipe(**inputs, num_images_per_prompt=num_images_per_prompt)[0] + + assert images.shape[0] == batch_size * num_images_per_prompt + + def test_float16_inference(self): + super().test_float16_inference(expected_max_diff=5e-1) + + def test_save_load_local(self): + super().test_save_load_local(expected_max_difference=5e-3) + + @unittest.skip("Key error is raised with accelerate") + def test_sequential_cpu_offload_forward_pass(self): + pass + + +@nightly +@require_torch_accelerator +class ShapEPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_shap_e(self): + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/shap_e/test_shap_e_np_out.npy" + ) + pipe = ShapEPipeline.from_pretrained("openai/shap-e") + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator = torch.Generator(device=torch_device).manual_seed(0) + + images = pipe( + "a shark", + generator=generator, + guidance_scale=15.0, + num_inference_steps=64, + frame_size=64, + output_type="np", + ).images[0] + + assert images.shape == (20, 64, 64, 3) + + assert_mean_pixel_difference(images, expected_image) diff --git a/pythonProject/diffusers-main/tests/pipelines/shap_e/test_shap_e_img2img.py b/pythonProject/diffusers-main/tests/pipelines/shap_e/test_shap_e_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..b1867db249ea69f5b8b3158f3e7b5c38e18ba55f --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/shap_e/test_shap_e_img2img.py @@ -0,0 +1,293 @@ +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from transformers import CLIPImageProcessor, CLIPVisionConfig, CLIPVisionModel + +from diffusers import HeunDiscreteScheduler, PriorTransformer, ShapEImg2ImgPipeline +from diffusers.pipelines.shap_e import ShapERenderer + +from ...testing_utils import ( + backend_empty_cache, + floats_tensor, + load_image, + load_numpy, + nightly, + require_torch_accelerator, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference + + +class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = ShapEImg2ImgPipeline + params = ["image"] + batch_params = ["image"] + required_optional_params = [ + "num_images_per_prompt", + "num_inference_steps", + "generator", + "latents", + "guidance_scale", + "frame_size", + "output_type", + "return_dict", + ] + test_xformers_attention = False + + supports_dduf = False + + @property + def text_embedder_hidden_size(self): + return 16 + + @property + def time_input_dim(self): + return 16 + + @property + def time_embed_dim(self): + return self.time_input_dim * 4 + + @property + def renderer_dim(self): + return 8 + + @property + def dummy_image_encoder(self): + torch.manual_seed(0) + config = CLIPVisionConfig( + hidden_size=self.text_embedder_hidden_size, + image_size=32, + projection_dim=self.text_embedder_hidden_size, + intermediate_size=24, + num_attention_heads=2, + num_channels=3, + num_hidden_layers=5, + patch_size=1, + ) + + model = CLIPVisionModel(config) + return model + + @property + def dummy_image_processor(self): + image_processor = CLIPImageProcessor( + crop_size=224, + do_center_crop=True, + do_normalize=True, + do_resize=True, + image_mean=[0.48145466, 0.4578275, 0.40821073], + image_std=[0.26862954, 0.26130258, 0.27577711], + resample=3, + size=224, + ) + + return image_processor + + @property + def dummy_prior(self): + torch.manual_seed(0) + + model_kwargs = { + "num_attention_heads": 2, + "attention_head_dim": 16, + "embedding_dim": self.time_input_dim, + "num_embeddings": 32, + "embedding_proj_dim": self.text_embedder_hidden_size, + "time_embed_dim": self.time_embed_dim, + "num_layers": 1, + "clip_embed_dim": self.time_input_dim * 2, + "additional_embeddings": 0, + "time_embed_act_fn": "gelu", + "norm_in_type": "layer", + "embedding_proj_norm_type": "layer", + "encoder_hid_proj_type": None, + "added_emb_type": None, + } + + model = PriorTransformer(**model_kwargs) + return model + + @property + def dummy_renderer(self): + torch.manual_seed(0) + + model_kwargs = { + "param_shapes": ( + (self.renderer_dim, 93), + (self.renderer_dim, 8), + (self.renderer_dim, 8), + (self.renderer_dim, 8), + ), + "d_latent": self.time_input_dim, + "d_hidden": self.renderer_dim, + "n_output": 12, + "background": ( + 0.1, + 0.1, + 0.1, + ), + } + model = ShapERenderer(**model_kwargs) + return model + + def get_dummy_components(self): + prior = self.dummy_prior + image_encoder = self.dummy_image_encoder + image_processor = self.dummy_image_processor + shap_e_renderer = self.dummy_renderer + + scheduler = HeunDiscreteScheduler( + beta_schedule="exp", + num_train_timesteps=1024, + prediction_type="sample", + use_karras_sigmas=True, + clip_sample=True, + clip_sample_range=1.0, + ) + components = { + "prior": prior, + "image_encoder": image_encoder, + "image_processor": image_processor, + "shap_e_renderer": shap_e_renderer, + "scheduler": scheduler, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + input_image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "image": input_image, + "generator": generator, + "num_inference_steps": 1, + "frame_size": 32, + "output_type": "latent", + } + return inputs + + def test_shap_e(self): + device = "cpu" + + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + + pipe.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(device)) + image = output.images[0] + image_slice = image[-3:, -3:].cpu().numpy() + + assert image.shape == (32, 16) + + expected_slice = np.array( + [-1.0, 0.40668195, 0.57322013, -0.9469888, 0.4283227, 0.30348337, -0.81094897, 0.74555075, 0.15342723] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_inference_batch_consistent(self): + # NOTE: Larger batch sizes cause this test to timeout, only test on smaller batches + self._test_inference_batch_consistent(batch_sizes=[2]) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical( + batch_size=2, + expected_max_diff=6e-3, + ) + + def test_num_images_per_prompt(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + batch_size = 1 + num_images_per_prompt = 2 + + inputs = self.get_dummy_inputs(torch_device) + + for key in inputs.keys(): + if key in self.batch_params: + inputs[key] = batch_size * [inputs[key]] + + images = pipe(**inputs, num_images_per_prompt=num_images_per_prompt)[0] + + assert images.shape[0] == batch_size * num_images_per_prompt + + def test_float16_inference(self): + super().test_float16_inference(expected_max_diff=1e-1) + + def test_save_load_local(self): + super().test_save_load_local(expected_max_difference=5e-3) + + @unittest.skip("Key error is raised with accelerate") + def test_sequential_cpu_offload_forward_pass(self): + pass + + +@nightly +@require_torch_accelerator +class ShapEImg2ImgPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_shap_e_img2img(self): + input_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/shap_e/corgi.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/shap_e/test_shap_e_img2img_out.npy" + ) + pipe = ShapEImg2ImgPipeline.from_pretrained("openai/shap-e-img2img") + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator = torch.Generator(device=torch_device).manual_seed(0) + + images = pipe( + input_image, + generator=generator, + guidance_scale=3.0, + num_inference_steps=64, + frame_size=64, + output_type="np", + ).images[0] + + assert images.shape == (20, 64, 64, 3) + + assert_mean_pixel_difference(images, expected_image) diff --git a/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/__init__.py b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2.py b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..1bcec877c30d58ceb773e7eaddb5407049a97ee0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2.py @@ -0,0 +1,137 @@ +# Copyright 2024 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKLWan, + SkyReelsV2Pipeline, + SkyReelsV2Transformer3DModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import ( + enable_full_determinism, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import ( + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class SkyReelsV2PipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SkyReelsV2Pipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + expected_video = torch.randn(9, 3, 16, 16) + max_diff = np.abs(generated_video - expected_video).max() + self.assertLessEqual(max_diff, 1e10) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py new file mode 100644 index 0000000000000000000000000000000000000000..74235d59efd6e298c0f49f95cbb45abf44f1fa2e --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df.py @@ -0,0 +1,137 @@ +# Copyright 2024 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKLWan, + SkyReelsV2DiffusionForcingPipeline, + SkyReelsV2Transformer3DModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import ( + enable_full_determinism, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import ( + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class SkyReelsV2DiffusionForcingPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SkyReelsV2DiffusionForcingPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=8.0, use_flow_sigmas=True) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + expected_video = torch.randn(9, 3, 16, 16) + max_diff = np.abs(generated_video - expected_video).max() + self.assertLessEqual(max_diff, 1e10) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py new file mode 100644 index 0000000000000000000000000000000000000000..f0cbc710df05b92be44bf54ddf3f71c513d90920 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df_image_to_video.py @@ -0,0 +1,215 @@ +# Copyright 2024 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import ( + AutoTokenizer, + T5EncoderModel, +) + +from diffusers import ( + AutoencoderKLWan, + SkyReelsV2DiffusionForcingImageToVideoPipeline, + SkyReelsV2Transformer3DModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import enable_full_determinism +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SkyReelsV2DiffusionForcingImageToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs", "height", "width"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + image_dim=4, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 16 + image_width = 16 + image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + expected_video = torch.randn(9, 3, 16, 16) + max_diff = np.abs(generated_video - expected_video).max() + self.assertLessEqual(max_diff, 1e10) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip("TODO: revisit failing as it requires a very high threshold to pass") + def test_inference_batch_single_identical(self): + pass + + +class SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests(SkyReelsV2DiffusionForcingImageToVideoPipelineFastTests): + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + image_dim=4, + pos_embed_seq_len=2 * (4 * 4 + 1), + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 16 + image_width = 16 + image = Image.new("RGB", (image_width, image_height)) + last_image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "last_image": last_image, + "prompt": "dance monkey", + "negative_prompt": "negative", + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs diff --git a/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py new file mode 100644 index 0000000000000000000000000000000000000000..1b0b23318e633753423c51c052322cc64bffe6d1 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_df_video_to_video.py @@ -0,0 +1,201 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKLWan, + SkyReelsV2DiffusionForcingVideoToVideoPipeline, + SkyReelsV2Transformer3DModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import ( + enable_full_determinism, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import ( + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class SkyReelsV2DiffusionForcingVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SkyReelsV2DiffusionForcingVideoToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = frozenset(["video", "prompt", "negative_prompt"]) + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + video = [Image.new("RGB", (16, 16))] * 7 + inputs = { + "video": video, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "generator": generator, + "num_inference_steps": 4, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "max_sequence_length": 16, + "output_type": "pt", + "overlap_history": 3, + "num_frames": 17, + "base_num_frames": 5, + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + + total_frames = len(inputs["video"]) + inputs["num_frames"] + expected_shape = (total_frames, 3, 16, 16) + self.assertEqual(generated_video.shape, expected_shape) + expected_video = torch.randn(*expected_shape) + max_diff = np.abs(generated_video - expected_video).max() + self.assertLessEqual(max_diff, 1e10) + + def test_callback_cfg(self): + sig = inspect.signature(self.pipeline_class.__call__) + has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters + has_callback_step_end = "callback_on_step_end" in sig.parameters + + if not (has_callback_tensor_inputs and has_callback_step_end): + return + + if "guidance_scale" not in sig.parameters: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + self.assertTrue( + hasattr(pipe, "_callback_tensor_inputs"), + f" {self.pipeline_class} should have `_callback_tensor_inputs` that defines a list of tensor variables its callback function can use as inputs", + ) + + # Track the number of callback calls for diffusion forcing pipelines + callback_call_count = [0] # Use list to make it mutable in closure + + def callback_increase_guidance(pipe, i, t, callback_kwargs): + pipe._guidance_scale += 1.0 + callback_call_count[0] += 1 + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + + # use cfg guidance because some pipelines modify the shape of the latents + # outside of the denoising loop + inputs["guidance_scale"] = 2.0 + inputs["callback_on_step_end"] = callback_increase_guidance + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + _ = pipe(**inputs)[0] + + # For diffusion forcing pipelines, use the actual callback count + # since they run multiple iterations with nested denoising loops + expected_guidance_scale = inputs["guidance_scale"] + callback_call_count[0] + + assert pipe.guidance_scale == expected_guidance_scale + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip( + "SkyReelsV2DiffusionForcingVideoToVideoPipeline has to run in mixed precision. Casting the entire pipeline will result in errors" + ) + def test_float16_inference(self): + pass + + @unittest.skip( + "SkyReelsV2DiffusionForcingVideoToVideoPipeline has to run in mixed precision. Save/Load the entire pipeline in FP16 will result in errors" + ) + def test_save_load_float16(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py new file mode 100644 index 0000000000000000000000000000000000000000..784f701a29d2cf1ac81877bae69dc8b199729455 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/skyreels_v2/test_skyreels_v2_image_to_video.py @@ -0,0 +1,220 @@ +# Copyright 2024 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import ( + AutoTokenizer, + CLIPImageProcessor, + CLIPVisionConfig, + CLIPVisionModelWithProjection, + T5EncoderModel, +) + +from diffusers import ( + AutoencoderKLWan, + SkyReelsV2ImageToVideoPipeline, + SkyReelsV2Transformer3DModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import enable_full_determinism +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class SkyReelsV2ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = SkyReelsV2ImageToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs", "height", "width"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=5.0, use_flow_sigmas=True) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=36, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + image_dim=4, + ) + + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=4, + projection_dim=4, + num_hidden_layers=2, + num_attention_heads=2, + image_size=32, + intermediate_size=16, + patch_size=1, + ) + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + torch.manual_seed(0) + image_processor = CLIPImageProcessor(crop_size=32, size=32) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "image_encoder": image_encoder, + "image_processor": image_processor, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 16 + image_width = 16 + image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + expected_video = torch.randn(9, 3, 16, 16) + max_diff = np.abs(generated_video - expected_video).max() + self.assertLessEqual(max_diff, 1e10) + + def test_inference_with_last_image(self): + device = "cpu" + + components = self.get_dummy_components() + torch.manual_seed(0) + components["transformer"] = SkyReelsV2Transformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=36, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + pos_embed_seq_len=2 * (4 * 4 + 1), + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + image_dim=4, + ) + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=4, + projection_dim=4, + num_hidden_layers=2, + num_attention_heads=2, + image_size=4, + intermediate_size=16, + patch_size=1, + ) + components["image_encoder"] = CLIPVisionModelWithProjection(image_encoder_config) + + torch.manual_seed(0) + components["image_processor"] = CLIPImageProcessor(crop_size=4, size=4) + + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image_height = 16 + image_width = 16 + last_image = Image.new("RGB", (image_width, image_height)) + inputs["last_image"] = last_image + + video = pipe(**inputs).frames + generated_video = video[0] + + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + expected_video = torch.randn(9, 3, 16, 16) + max_diff = np.abs(generated_video - expected_video).max() + self.assertLessEqual(max_diff, 1e10) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip("TODO: revisit failing as it requires a very high threshold to pass") + def test_inference_batch_single_identical(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_audio/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_audio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_audio/test_stable_audio.py b/pythonProject/diffusers-main/tests/pipelines/stable_audio/test_stable_audio.py new file mode 100644 index 0000000000000000000000000000000000000000..dd03f4d07f07c3e860953b8e1b810640fab9a09a --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_audio/test_stable_audio.py @@ -0,0 +1,480 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import gc +import unittest + +import numpy as np +import torch +from transformers import ( + T5EncoderModel, + T5Tokenizer, +) + +from diffusers import ( + AutoencoderOobleck, + CosineDPMSolverMultistepScheduler, + StableAudioDiTModel, + StableAudioPipeline, + StableAudioProjectionModel, +) +from diffusers.utils import is_xformers_available + +from ...testing_utils import ( + Expectations, + backend_empty_cache, + enable_full_determinism, + nightly, + require_torch_accelerator, + torch_device, +) +from ..pipeline_params import TEXT_TO_AUDIO_BATCH_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class StableAudioPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = StableAudioPipeline + params = frozenset( + [ + "prompt", + "audio_end_in_s", + "audio_start_in_s", + "guidance_scale", + "negative_prompt", + "prompt_embeds", + "negative_prompt_embeds", + "initial_audio_waveforms", + ] + ) + batch_params = TEXT_TO_AUDIO_BATCH_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "num_waveforms_per_prompt", + "generator", + "latents", + "output_type", + "return_dict", + "callback", + "callback_steps", + ] + ) + # There is not xformers version of the StableAudioPipeline custom attention processor + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = StableAudioDiTModel( + sample_size=4, + in_channels=3, + num_layers=2, + attention_head_dim=4, + num_key_value_attention_heads=2, + out_channels=3, + cross_attention_dim=4, + time_proj_dim=8, + global_states_input_dim=8, + cross_attention_input_dim=4, + ) + scheduler = CosineDPMSolverMultistepScheduler( + solver_order=2, + prediction_type="v_prediction", + sigma_data=1.0, + sigma_schedule="exponential", + ) + torch.manual_seed(0) + vae = AutoencoderOobleck( + encoder_hidden_size=6, + downsampling_ratios=[1, 2], + decoder_channels=3, + decoder_input_channels=3, + audio_channels=2, + channel_multiples=[2, 4], + sampling_rate=4, + ) + torch.manual_seed(0) + t5_repo_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" + text_encoder = T5EncoderModel.from_pretrained(t5_repo_id) + tokenizer = T5Tokenizer.from_pretrained(t5_repo_id, truncation=True, model_max_length=25) + + torch.manual_seed(0) + projection_model = StableAudioProjectionModel( + text_encoder_dim=text_encoder.config.d_model, + conditioning_dim=4, + min_value=0, + max_value=32, + ) + + components = { + "transformer": transformer, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "projection_model": projection_model, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A hammer hitting a wooden surface", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + } + return inputs + + def test_save_load_local(self): + # increase tolerance from 1e-4 -> 7e-3 to account for large composite model + super().test_save_load_local(expected_max_difference=7e-3) + + def test_save_load_optional_components(self): + # increase tolerance from 1e-4 -> 7e-3 to account for large composite model + super().test_save_load_optional_components(expected_max_difference=7e-3) + + def test_stable_audio_ddim(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(torch_device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = stable_audio_pipe(**inputs) + audio = output.audios[0] + + assert audio.ndim == 2 + assert audio.shape == (2, 7) + + def test_stable_audio_without_prompts(self): + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(torch_device) + stable_audio_pipe = stable_audio_pipe.to(torch_device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + inputs["prompt"] = 3 * [inputs["prompt"]] + + # forward + output = stable_audio_pipe(**inputs) + audio_1 = output.audios[0] + + inputs = self.get_dummy_inputs(torch_device) + prompt = 3 * [inputs.pop("prompt")] + + text_inputs = stable_audio_pipe.tokenizer( + prompt, + padding="max_length", + max_length=stable_audio_pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ).to(torch_device) + text_input_ids = text_inputs.input_ids + attention_mask = text_inputs.attention_mask + + prompt_embeds = stable_audio_pipe.text_encoder( + text_input_ids, + attention_mask=attention_mask, + )[0] + + inputs["prompt_embeds"] = prompt_embeds + inputs["attention_mask"] = attention_mask + + # forward + output = stable_audio_pipe(**inputs) + audio_2 = output.audios[0] + + assert (audio_1 - audio_2).abs().max() < 1e-2 + + def test_stable_audio_negative_without_prompts(self): + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(torch_device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + negative_prompt = 3 * ["this is a negative prompt"] + inputs["negative_prompt"] = negative_prompt + inputs["prompt"] = 3 * [inputs["prompt"]] + + # forward + output = stable_audio_pipe(**inputs) + audio_1 = output.audios[0] + + inputs = self.get_dummy_inputs(torch_device) + prompt = 3 * [inputs.pop("prompt")] + + text_inputs = stable_audio_pipe.tokenizer( + prompt, + padding="max_length", + max_length=stable_audio_pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ).to(torch_device) + text_input_ids = text_inputs.input_ids + attention_mask = text_inputs.attention_mask + + prompt_embeds = stable_audio_pipe.text_encoder( + text_input_ids, + attention_mask=attention_mask, + )[0] + + inputs["prompt_embeds"] = prompt_embeds + inputs["attention_mask"] = attention_mask + + negative_text_inputs = stable_audio_pipe.tokenizer( + negative_prompt, + padding="max_length", + max_length=stable_audio_pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ).to(torch_device) + negative_text_input_ids = negative_text_inputs.input_ids + negative_attention_mask = negative_text_inputs.attention_mask + + negative_prompt_embeds = stable_audio_pipe.text_encoder( + negative_text_input_ids, + attention_mask=negative_attention_mask, + )[0] + + inputs["negative_prompt_embeds"] = negative_prompt_embeds + inputs["negative_attention_mask"] = negative_attention_mask + + # forward + output = stable_audio_pipe(**inputs) + audio_2 = output.audios[0] + + assert (audio_1 - audio_2).abs().max() < 1e-2 + + def test_stable_audio_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + negative_prompt = "egg cracking" + output = stable_audio_pipe(**inputs, negative_prompt=negative_prompt) + audio = output.audios[0] + + assert audio.ndim == 2 + assert audio.shape == (2, 7) + + def test_stable_audio_num_waveforms_per_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + prompt = "A hammer hitting a wooden surface" + + # test num_waveforms_per_prompt=1 (default) + audios = stable_audio_pipe(prompt, num_inference_steps=2).audios + + assert audios.shape == (1, 2, 7) + + # test num_waveforms_per_prompt=1 (default) for batch of prompts + batch_size = 2 + audios = stable_audio_pipe([prompt] * batch_size, num_inference_steps=2).audios + + assert audios.shape == (batch_size, 2, 7) + + # test num_waveforms_per_prompt for single prompt + num_waveforms_per_prompt = 2 + audios = stable_audio_pipe( + prompt, num_inference_steps=2, num_waveforms_per_prompt=num_waveforms_per_prompt + ).audios + + assert audios.shape == (num_waveforms_per_prompt, 2, 7) + + # test num_waveforms_per_prompt for batch of prompts + batch_size = 2 + audios = stable_audio_pipe( + [prompt] * batch_size, num_inference_steps=2, num_waveforms_per_prompt=num_waveforms_per_prompt + ).audios + + assert audios.shape == (batch_size * num_waveforms_per_prompt, 2, 7) + + def test_stable_audio_audio_end_in_s(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(torch_device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = stable_audio_pipe(audio_end_in_s=1.5, **inputs) + audio = output.audios[0] + + assert audio.ndim == 2 + assert audio.shape[1] / stable_audio_pipe.vae.sampling_rate == 1.5 + + output = stable_audio_pipe(audio_end_in_s=1.1875, **inputs) + audio = output.audios[0] + + assert audio.ndim == 2 + assert audio.shape[1] / stable_audio_pipe.vae.sampling_rate == 1.0 + + def test_attention_slicing_forward_pass(self): + self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=5e-4) + + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + self._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False) + + def test_stable_audio_input_waveform(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + stable_audio_pipe = StableAudioPipeline(**components) + stable_audio_pipe = stable_audio_pipe.to(device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + prompt = "A hammer hitting a wooden surface" + + initial_audio_waveforms = torch.ones((1, 5)) + + # test raises error when no sampling rate + with self.assertRaises(ValueError): + audios = stable_audio_pipe( + prompt, num_inference_steps=2, initial_audio_waveforms=initial_audio_waveforms + ).audios + + # test raises error when wrong sampling rate + with self.assertRaises(ValueError): + audios = stable_audio_pipe( + prompt, + num_inference_steps=2, + initial_audio_waveforms=initial_audio_waveforms, + initial_audio_sampling_rate=stable_audio_pipe.vae.sampling_rate - 1, + ).audios + + audios = stable_audio_pipe( + prompt, + num_inference_steps=2, + initial_audio_waveforms=initial_audio_waveforms, + initial_audio_sampling_rate=stable_audio_pipe.vae.sampling_rate, + ).audios + assert audios.shape == (1, 2, 7) + + # test works with num_waveforms_per_prompt + num_waveforms_per_prompt = 2 + audios = stable_audio_pipe( + prompt, + num_inference_steps=2, + num_waveforms_per_prompt=num_waveforms_per_prompt, + initial_audio_waveforms=initial_audio_waveforms, + initial_audio_sampling_rate=stable_audio_pipe.vae.sampling_rate, + ).audios + + assert audios.shape == (num_waveforms_per_prompt, 2, 7) + + # test num_waveforms_per_prompt for batch of prompts and input audio (two channels) + batch_size = 2 + initial_audio_waveforms = torch.ones((batch_size, 2, 5)) + audios = stable_audio_pipe( + [prompt] * batch_size, + num_inference_steps=2, + num_waveforms_per_prompt=num_waveforms_per_prompt, + initial_audio_waveforms=initial_audio_waveforms, + initial_audio_sampling_rate=stable_audio_pipe.vae.sampling_rate, + ).audios + + assert audios.shape == (batch_size * num_waveforms_per_prompt, 2, 7) + + @unittest.skip("Not supported yet") + def test_sequential_cpu_offload_forward_pass(self): + pass + + @unittest.skip("Not supported yet") + def test_sequential_offload_forward_pass_twice(self): + pass + + @unittest.skip("Test not supported because `rotary_embed_dim` doesn't have any sensible default.") + def test_encode_prompt_works_in_isolation(self): + pass + + +@nightly +@require_torch_accelerator +class StableAudioPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + latents = np.random.RandomState(seed).standard_normal((1, 64, 1024)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "prompt": "A hammer hitting a wooden surface", + "latents": latents, + "generator": generator, + "num_inference_steps": 3, + "audio_end_in_s": 30, + "guidance_scale": 2.5, + } + return inputs + + def test_stable_audio(self): + stable_audio_pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0") + stable_audio_pipe = stable_audio_pipe.to(torch_device) + stable_audio_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 25 + audio = stable_audio_pipe(**inputs).audios[0] + + assert audio.ndim == 2 + assert audio.shape == (2, int(inputs["audio_end_in_s"] * stable_audio_pipe.vae.sampling_rate)) + # check the portion of the generated audio with the largest dynamic range (reduces flakiness) + audio_slice = audio[0, 447590:447600] + # fmt: off + expected_slices = Expectations( + { + ("xpu", 3): np.array([-0.0285, 0.1083, 0.1863, 0.3165, 0.5312, 0.6971, 0.6958, 0.6177, 0.5598, 0.5048]), + ("cuda", 7): np.array([-0.0278, 0.1096, 0.1877, 0.3178, 0.5329, 0.6990, 0.6972, 0.6186, 0.5608, 0.5060]), + ("cuda", 8): np.array([-0.0285, 0.1082, 0.1862, 0.3163, 0.5306, 0.6964, 0.6953, 0.6172, 0.5593, 0.5044]), + } + ) + # fmt: on + + expected_slice = expected_slices.get_expectation() + max_diff = np.abs(expected_slice - audio_slice.detach().cpu().numpy()).max() + assert max_diff < 1.5e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_cascade/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_combined.py b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_combined.py new file mode 100644 index 0000000000000000000000000000000000000000..afa0db39f3fa9f2125529c821dd060fdd910ead9 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_combined.py @@ -0,0 +1,244 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import DDPMWuerstchenScheduler, StableCascadeCombinedPipeline +from diffusers.models import StableCascadeUNet +from diffusers.pipelines.wuerstchen import PaellaVQModel + +from ...testing_utils import enable_full_determinism, require_torch_accelerator, torch_device +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class StableCascadeCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = StableCascadeCombinedPipeline + params = ["prompt"] + batch_params = ["prompt", "negative_prompt"] + required_optional_params = [ + "generator", + "height", + "width", + "latents", + "prior_guidance_scale", + "decoder_guidance_scale", + "negative_prompt", + "num_inference_steps", + "return_dict", + "prior_num_inference_steps", + "output_type", + ] + test_xformers_attention = True + + @property + def text_embedder_hidden_size(self): + return 32 + + @property + def dummy_prior(self): + torch.manual_seed(0) + + model_kwargs = { + "conditioning_dim": 128, + "block_out_channels": (128, 128), + "num_attention_heads": (2, 2), + "down_num_layers_per_block": (1, 1), + "up_num_layers_per_block": (1, 1), + "clip_image_in_channels": 768, + "switch_level": (False,), + "clip_text_in_channels": self.text_embedder_hidden_size, + "clip_text_pooled_in_channels": self.text_embedder_hidden_size, + } + + model = StableCascadeUNet(**model_kwargs) + return model.eval() + + @property + def dummy_tokenizer(self): + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + return tokenizer + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + projection_dim=self.text_embedder_hidden_size, + hidden_size=self.text_embedder_hidden_size, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + return CLIPTextModelWithProjection(config).eval() + + @property + def dummy_vqgan(self): + torch.manual_seed(0) + + model_kwargs = { + "bottleneck_blocks": 1, + "num_vq_embeddings": 2, + } + model = PaellaVQModel(**model_kwargs) + return model.eval() + + @property + def dummy_decoder(self): + torch.manual_seed(0) + model_kwargs = { + "in_channels": 4, + "out_channels": 4, + "conditioning_dim": 128, + "block_out_channels": (16, 32, 64, 128), + "num_attention_heads": (-1, -1, 1, 2), + "down_num_layers_per_block": (1, 1, 1, 1), + "up_num_layers_per_block": (1, 1, 1, 1), + "down_blocks_repeat_mappers": (1, 1, 1, 1), + "up_blocks_repeat_mappers": (3, 3, 2, 2), + "block_types_per_layer": ( + ("SDCascadeResBlock", "SDCascadeTimestepBlock"), + ("SDCascadeResBlock", "SDCascadeTimestepBlock"), + ("SDCascadeResBlock", "SDCascadeTimestepBlock", "SDCascadeAttnBlock"), + ("SDCascadeResBlock", "SDCascadeTimestepBlock", "SDCascadeAttnBlock"), + ), + "switch_level": None, + "clip_text_pooled_in_channels": 32, + "dropout": (0.1, 0.1, 0.1, 0.1), + } + + model = StableCascadeUNet(**model_kwargs) + return model.eval() + + def get_dummy_components(self): + prior = self.dummy_prior + + scheduler = DDPMWuerstchenScheduler() + tokenizer = self.dummy_tokenizer + text_encoder = self.dummy_text_encoder + decoder = self.dummy_decoder + vqgan = self.dummy_vqgan + prior_text_encoder = self.dummy_text_encoder + prior_tokenizer = self.dummy_tokenizer + + components = { + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "decoder": decoder, + "scheduler": scheduler, + "vqgan": vqgan, + "prior_text_encoder": prior_text_encoder, + "prior_tokenizer": prior_tokenizer, + "prior_prior": prior, + "prior_scheduler": scheduler, + "prior_feature_extractor": None, + "prior_image_encoder": None, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "horse", + "generator": generator, + "prior_guidance_scale": 4.0, + "decoder_guidance_scale": 4.0, + "num_inference_steps": 2, + "prior_num_inference_steps": 2, + "output_type": "np", + "height": 128, + "width": 128, + } + return inputs + + def test_stable_cascade(self): + device = "cpu" + + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + + pipe.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(device)) + image = output.images + + image_from_tuple = pipe(**self.get_dummy_inputs(device), return_dict=False)[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[-3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + + expected_slice = np.array([0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2, ( + f" expected_slice {expected_slice}, but got {image_slice.flatten()}" + ) + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2, ( + f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" + ) + + @require_torch_accelerator + def test_offloads(self): + pipes = [] + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components).to(torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components) + sd_pipe.enable_sequential_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components) + sd_pipe.enable_model_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + image_slices = [] + for pipe in pipes: + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3 + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=2e-2) + + @unittest.skip(reason="fp16 not supported") + def test_float16_inference(self): + super().test_float16_inference() + + @unittest.skip(reason="no callback test for combined pipeline") + def test_callback_inputs(self): + super().test_callback_inputs() diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..5b3acb8705b333089f04ebb4fda8b638b8678768 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py @@ -0,0 +1,324 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import DDPMWuerstchenScheduler, StableCascadeDecoderPipeline +from diffusers.models import StableCascadeUNet +from diffusers.pipelines.wuerstchen import PaellaVQModel +from diffusers.utils.torch_utils import randn_tensor + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + load_numpy, + load_pt, + numpy_cosine_similarity_distance, + require_torch_accelerator, + skip_mps, + slow, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class StableCascadeDecoderPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = StableCascadeDecoderPipeline + params = ["prompt"] + batch_params = ["image_embeddings", "prompt", "negative_prompt"] + required_optional_params = [ + "num_images_per_prompt", + "num_inference_steps", + "latents", + "negative_prompt", + "guidance_scale", + "output_type", + "return_dict", + ] + test_xformers_attention = False + callback_cfg_params = ["image_embeddings", "text_encoder_hidden_states"] + + @property + def text_embedder_hidden_size(self): + return 32 + + @property + def time_input_dim(self): + return 32 + + @property + def block_out_channels_0(self): + return self.time_input_dim + + @property + def time_embed_dim(self): + return self.time_input_dim * 4 + + @property + def dummy_tokenizer(self): + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + return tokenizer + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + projection_dim=self.text_embedder_hidden_size, + hidden_size=self.text_embedder_hidden_size, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + return CLIPTextModelWithProjection(config).eval() + + @property + def dummy_vqgan(self): + torch.manual_seed(0) + + model_kwargs = { + "bottleneck_blocks": 1, + "num_vq_embeddings": 2, + } + model = PaellaVQModel(**model_kwargs) + return model.eval() + + @property + def dummy_decoder(self): + torch.manual_seed(0) + model_kwargs = { + "in_channels": 4, + "out_channels": 4, + "conditioning_dim": 128, + "block_out_channels": [16, 32, 64, 128], + "num_attention_heads": [-1, -1, 1, 2], + "down_num_layers_per_block": [1, 1, 1, 1], + "up_num_layers_per_block": [1, 1, 1, 1], + "down_blocks_repeat_mappers": [1, 1, 1, 1], + "up_blocks_repeat_mappers": [3, 3, 2, 2], + "block_types_per_layer": [ + ["SDCascadeResBlock", "SDCascadeTimestepBlock"], + ["SDCascadeResBlock", "SDCascadeTimestepBlock"], + ["SDCascadeResBlock", "SDCascadeTimestepBlock", "SDCascadeAttnBlock"], + ["SDCascadeResBlock", "SDCascadeTimestepBlock", "SDCascadeAttnBlock"], + ], + "switch_level": None, + "clip_text_pooled_in_channels": 32, + "dropout": [0.1, 0.1, 0.1, 0.1], + } + model = StableCascadeUNet(**model_kwargs) + return model.eval() + + def get_dummy_components(self): + decoder = self.dummy_decoder + text_encoder = self.dummy_text_encoder + tokenizer = self.dummy_tokenizer + vqgan = self.dummy_vqgan + + scheduler = DDPMWuerstchenScheduler() + + components = { + "decoder": decoder, + "vqgan": vqgan, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "scheduler": scheduler, + "latent_dim_scale": 4.0, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "image_embeddings": torch.ones((1, 4, 4, 4), device=device), + "prompt": "horse", + "generator": generator, + "guidance_scale": 2.0, + "num_inference_steps": 2, + "output_type": "np", + } + return inputs + + def test_wuerstchen_decoder(self): + device = "cpu" + + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + + pipe.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(device)) + image = output.images + + image_from_tuple = pipe(**self.get_dummy_inputs(device), return_dict=False) + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array([0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + + @skip_mps + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-2) + + @skip_mps + def test_attention_slicing_forward_pass(self): + test_max_difference = torch_device == "cpu" + test_mean_pixel_difference = False + + self._test_attention_slicing_forward_pass( + test_max_difference=test_max_difference, + test_mean_pixel_difference=test_mean_pixel_difference, + ) + + @unittest.skip(reason="fp16 not supported") + def test_float16_inference(self): + super().test_float16_inference() + + def test_stable_cascade_decoder_single_prompt_multiple_image_embeddings(self): + device = "cpu" + components = self.get_dummy_components() + + pipe = StableCascadeDecoderPipeline(**components) + pipe.set_progress_bar_config(disable=None) + + prior_num_images_per_prompt = 2 + decoder_num_images_per_prompt = 2 + prompt = ["a cat"] + batch_size = len(prompt) + + generator = torch.Generator(device) + image_embeddings = randn_tensor( + (batch_size * prior_num_images_per_prompt, 4, 4, 4), generator=generator.manual_seed(0) + ) + decoder_output = pipe( + image_embeddings=image_embeddings, + prompt=prompt, + num_inference_steps=1, + output_type="np", + guidance_scale=0.0, + generator=generator.manual_seed(0), + num_images_per_prompt=decoder_num_images_per_prompt, + ) + + assert decoder_output.images.shape[0] == ( + batch_size * prior_num_images_per_prompt * decoder_num_images_per_prompt + ) + + def test_stable_cascade_decoder_single_prompt_multiple_image_embeddings_with_guidance(self): + device = "cpu" + components = self.get_dummy_components() + + pipe = StableCascadeDecoderPipeline(**components) + pipe.set_progress_bar_config(disable=None) + + prior_num_images_per_prompt = 2 + decoder_num_images_per_prompt = 2 + prompt = ["a cat"] + batch_size = len(prompt) + + generator = torch.Generator(device) + image_embeddings = randn_tensor( + (batch_size * prior_num_images_per_prompt, 4, 4, 4), generator=generator.manual_seed(0) + ) + decoder_output = pipe( + image_embeddings=image_embeddings, + prompt=prompt, + num_inference_steps=1, + output_type="np", + guidance_scale=2.0, + generator=generator.manual_seed(0), + num_images_per_prompt=decoder_num_images_per_prompt, + ) + + assert decoder_output.images.shape[0] == ( + batch_size * prior_num_images_per_prompt * decoder_num_images_per_prompt + ) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "batch_size": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableCascadeDecoderPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_cascade_decoder(self): + pipe = StableCascadeDecoderPipeline.from_pretrained( + "stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.bfloat16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background." + + generator = torch.Generator(device="cpu").manual_seed(0) + image_embedding = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt", + map_location=torch_device, + ) + + image = pipe( + prompt=prompt, + image_embeddings=image_embedding, + output_type="np", + num_inference_steps=2, + generator=generator, + ).images[0] + + assert image.shape == (1024, 1024, 3) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy" + ) + max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) + assert max_diff < 2e-4 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_prior.py b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_prior.py new file mode 100644 index 0000000000000000000000000000000000000000..f8267186db14dc785de11d65deaaf6455bff2c7a --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_cascade/test_stable_cascade_prior.py @@ -0,0 +1,284 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import DDPMWuerstchenScheduler, StableCascadePriorPipeline +from diffusers.models import StableCascadeUNet +from diffusers.utils.import_utils import is_peft_available + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + load_numpy, + numpy_cosine_similarity_distance, + require_peft_backend, + require_torch_accelerator, + skip_mps, + slow, + torch_device, +) + + +if is_peft_available(): + from peft import LoraConfig + from peft.tuners.tuners_utils import BaseTunerLayer + +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class StableCascadePriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = StableCascadePriorPipeline + params = ["prompt"] + batch_params = ["prompt", "negative_prompt"] + required_optional_params = [ + "num_images_per_prompt", + "generator", + "num_inference_steps", + "latents", + "negative_prompt", + "guidance_scale", + "output_type", + "return_dict", + ] + test_xformers_attention = False + callback_cfg_params = ["text_encoder_hidden_states"] + + @property + def text_embedder_hidden_size(self): + return 32 + + @property + def time_input_dim(self): + return 32 + + @property + def block_out_channels_0(self): + return self.time_input_dim + + @property + def time_embed_dim(self): + return self.time_input_dim * 4 + + @property + def dummy_tokenizer(self): + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + return tokenizer + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=self.text_embedder_hidden_size, + projection_dim=self.text_embedder_hidden_size, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + return CLIPTextModelWithProjection(config).eval() + + @property + def dummy_prior(self): + torch.manual_seed(0) + + model_kwargs = { + "conditioning_dim": 128, + "block_out_channels": (128, 128), + "num_attention_heads": (2, 2), + "down_num_layers_per_block": (1, 1), + "up_num_layers_per_block": (1, 1), + "switch_level": (False,), + "clip_image_in_channels": 768, + "clip_text_in_channels": self.text_embedder_hidden_size, + "clip_text_pooled_in_channels": self.text_embedder_hidden_size, + "dropout": (0.1, 0.1), + } + + model = StableCascadeUNet(**model_kwargs) + return model.eval() + + def get_dummy_components(self): + prior = self.dummy_prior + text_encoder = self.dummy_text_encoder + tokenizer = self.dummy_tokenizer + + scheduler = DDPMWuerstchenScheduler() + + components = { + "prior": prior, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "scheduler": scheduler, + "feature_extractor": None, + "image_encoder": None, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "horse", + "generator": generator, + "guidance_scale": 4.0, + "num_inference_steps": 2, + "output_type": "np", + } + return inputs + + def test_wuerstchen_prior(self): + device = "cpu" + + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + + pipe.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(device)) + image = output.image_embeddings + + image_from_tuple = pipe(**self.get_dummy_inputs(device), return_dict=False)[0] + + image_slice = image[0, 0, 0, -10:] + + image_from_tuple_slice = image_from_tuple[0, 0, 0, -10:] + assert image.shape == (1, 16, 24, 24) + + expected_slice = np.array( + [94.5498, -21.9481, -117.5025, -192.8760, 38.0117, 73.4709, 38.1142, -185.5593, -47.7869, 167.2853] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 5e-2 + + @skip_mps + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=2e-1) + + @skip_mps + def test_attention_slicing_forward_pass(self): + test_max_difference = torch_device == "cpu" + test_mean_pixel_difference = False + + self._test_attention_slicing_forward_pass( + test_max_difference=test_max_difference, + test_mean_pixel_difference=test_mean_pixel_difference, + ) + + @unittest.skip(reason="fp16 not supported") + def test_float16_inference(self): + super().test_float16_inference() + + def check_if_lora_correctly_set(self, model) -> bool: + """ + Checks if the LoRA layers are correctly set with peft + """ + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + return True + return False + + def get_lora_components(self): + prior = self.dummy_prior + + prior_lora_config = LoraConfig( + r=4, lora_alpha=4, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False + ) + + return prior, prior_lora_config + + @require_peft_backend + @unittest.skip(reason="no lora support for now") + def test_inference_with_prior_lora(self): + _, prior_lora_config = self.get_lora_components() + device = "cpu" + + components = self.get_dummy_components() + + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + + pipe.set_progress_bar_config(disable=None) + + output_no_lora = pipe(**self.get_dummy_inputs(device)) + image_embed = output_no_lora.image_embeddings + self.assertTrue(image_embed.shape == (1, 16, 24, 24)) + + pipe.prior.add_adapter(prior_lora_config) + self.assertTrue(self.check_if_lora_correctly_set(pipe.prior), "Lora not correctly set in prior") + + output_lora = pipe(**self.get_dummy_inputs(device)) + lora_image_embed = output_lora.image_embeddings + + self.assertTrue(image_embed.shape == lora_image_embed.shape) + + @unittest.skip("Test not supported because dtype determination relies on text encoder.") + def test_encode_prompt_works_in_isolation(self): + pass + + +@slow +@require_torch_accelerator +class StableCascadePriorPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_cascade_prior(self): + pipe = StableCascadePriorPipeline.from_pretrained( + "stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background." + + generator = torch.Generator(device="cpu").manual_seed(0) + + output = pipe(prompt, num_inference_steps=2, output_type="np", generator=generator) + image_embedding = output.image_embeddings + expected_image_embedding = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_prior_image_embeddings.npy" + ) + assert image_embedding.shape == (1, 16, 24, 24) + + max_diff = numpy_cosine_similarity_distance(image_embedding.flatten(), expected_image_embedding.flatten()) + assert max_diff < 1e-4 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..62414f3f19474a8da9b826409cffa6dacf7324b0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py @@ -0,0 +1,376 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +import unittest + +import numpy as np + +from diffusers import ( + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + OnnxStableDiffusionPipeline, + PNDMScheduler, +) + +from ...testing_utils import is_onnx_available, nightly, require_onnxruntime, require_torch_gpu +from ..test_pipelines_onnx_common import OnnxPipelineTesterMixin + + +if is_onnx_available(): + import onnxruntime as ort + + +class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): + hub_checkpoint = "hf-internal-testing/tiny-random-OnnxStableDiffusionPipeline" + + def get_dummy_inputs(self, seed=0): + generator = np.random.RandomState(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_pipeline_default_ddim(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.65072, 0.58492, 0.48219, 0.55521, 0.53180, 0.55939, 0.50697, 0.39800, 0.46455]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_pipeline_pndm(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config, skip_prk_steps=True) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.65863, 0.59425, 0.49326, 0.56313, 0.53875, 0.56627, 0.51065, 0.39777, 0.46330]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_pipeline_lms(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.53755, 0.60786, 0.47402, 0.49488, 0.51869, 0.49819, 0.47985, 0.38957, 0.44279]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_pipeline_euler(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.53755, 0.60786, 0.47402, 0.49488, 0.51869, 0.49819, 0.47985, 0.38957, 0.44279]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_pipeline_euler_ancestral(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.53817, 0.60812, 0.47384, 0.49530, 0.51894, 0.49814, 0.47984, 0.38958, 0.44271]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_pipeline_dpm_multistep(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.53895, 0.60808, 0.47933, 0.49608, 0.51886, 0.49950, 0.48053, 0.38957, 0.44200]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_prompt_embeds(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + inputs["prompt"] = 3 * [inputs["prompt"]] + + # forward + output = pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + inputs = self.get_dummy_inputs() + prompt = 3 * [inputs.pop("prompt")] + + text_inputs = pipe.tokenizer( + prompt, + padding="max_length", + max_length=pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="np", + ) + text_inputs = text_inputs["input_ids"] + + prompt_embeds = pipe.text_encoder(input_ids=text_inputs.astype(np.int32))[0] + + inputs["prompt_embeds"] = prompt_embeds + + # forward + output = pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + def test_stable_diffusion_negative_prompt_embeds(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + negative_prompt = 3 * ["this is a negative prompt"] + inputs["negative_prompt"] = negative_prompt + inputs["prompt"] = 3 * [inputs["prompt"]] + + # forward + output = pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + inputs = self.get_dummy_inputs() + prompt = 3 * [inputs.pop("prompt")] + + embeds = [] + for p in [prompt, negative_prompt]: + text_inputs = pipe.tokenizer( + p, + padding="max_length", + max_length=pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="np", + ) + text_inputs = text_inputs["input_ids"] + + embeds.append(pipe.text_encoder(input_ids=text_inputs.astype(np.int32))[0]) + + inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds + + # forward + output = pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + +@nightly +@require_onnxruntime +@require_torch_gpu +class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): + @property + def gpu_provider(self): + return ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "15000000000", # 15GB + "arena_extend_strategy": "kSameAsRequested", + }, + ) + + @property + def gpu_options(self): + options = ort.SessionOptions() + options.enable_mem_pattern = False + return options + + def test_inference_default_pndm(self): + # using the PNDM scheduler by default + sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + revision="onnx", + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + np.random.seed(0) + output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=10, output_type="np") + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.0452, 0.0390, 0.0087, 0.0350, 0.0617, 0.0364, 0.0544, 0.0523, 0.0720]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_ddim(self): + ddim_scheduler = DDIMScheduler.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx" + ) + sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", + revision="onnx", + scheduler=ddim_scheduler, + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "open neural network exchange" + generator = np.random.RandomState(0) + output = sd_pipe([prompt], guidance_scale=7.5, num_inference_steps=10, generator=generator, output_type="np") + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.2867, 0.1974, 0.1481, 0.7294, 0.7251, 0.6667, 0.4194, 0.5642, 0.6486]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_k_lms(self): + lms_scheduler = LMSDiscreteScheduler.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx" + ) + sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", + revision="onnx", + scheduler=lms_scheduler, + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "open neural network exchange" + generator = np.random.RandomState(0) + output = sd_pipe([prompt], guidance_scale=7.5, num_inference_steps=10, generator=generator, output_type="np") + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.2306, 0.1959, 0.1593, 0.6549, 0.6394, 0.5408, 0.5065, 0.6010, 0.6161]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_intermediate_state(self): + number_of_steps = 0 + + def test_callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: + test_callback_fn.has_been_called = True + nonlocal number_of_steps + number_of_steps += 1 + if step == 0: + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array( + [-0.6772, -0.3835, -1.2456, 0.1905, -1.0974, 0.6967, -1.9353, 0.0178, 1.0167] + ) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 + elif step == 5: + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array( + [-0.3351, 0.2241, -0.1837, -0.2325, -0.6577, 0.3393, -0.0241, 0.5899, 1.3875] + ) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 + + test_callback_fn.has_been_called = False + + pipe = OnnxStableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", + revision="onnx", + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "Andromeda galaxy in a bottle" + + generator = np.random.RandomState(0) + pipe( + prompt=prompt, + num_inference_steps=5, + guidance_scale=7.5, + generator=generator, + callback=test_callback_fn, + callback_steps=1, + ) + assert test_callback_fn.has_been_called + assert number_of_steps == 6 + + def test_stable_diffusion_no_safety_checker(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", + revision="onnx", + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + assert isinstance(pipe, OnnxStableDiffusionPipeline) + assert pipe.safety_checker is None + + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None + + # check that there's no error when saving a pipeline with one of the models being None + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) + pipe = OnnxStableDiffusionPipeline.from_pretrained(tmpdirname) + + # sanity check that the pipeline still works + assert pipe.safety_checker is None + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..28d1d0f37ff8cfb6613fe2a43cb95bbf1bdd35d6 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py @@ -0,0 +1,245 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import unittest + +import numpy as np + +from diffusers import ( + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + OnnxStableDiffusionImg2ImgPipeline, + PNDMScheduler, +) + +from ...testing_utils import ( + floats_tensor, + is_onnx_available, + load_image, + nightly, + require_onnxruntime, + require_torch_gpu, +) +from ..test_pipelines_onnx_common import OnnxPipelineTesterMixin + + +if is_onnx_available(): + import onnxruntime as ort + + +class OnnxStableDiffusionImg2ImgPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): + hub_checkpoint = "hf-internal-testing/tiny-random-OnnxStableDiffusionPipeline" + + def get_dummy_inputs(self, seed=0): + image = floats_tensor((1, 3, 128, 128), rng=random.Random(seed)) + generator = np.random.RandomState(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_pipeline_default_ddim(self): + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.69643, 0.58484, 0.50314, 0.58760, 0.55368, 0.59643, 0.51529, 0.41217, 0.49087]) + assert np.abs(image_slice - expected_slice).max() < 1e-1 + + def test_pipeline_pndm(self): + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config, skip_prk_steps=True) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.61737, 0.54642, 0.53183, 0.54465, 0.52742, 0.60525, 0.49969, 0.40655, 0.48154]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_lms(self): + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + # warmup pass to apply optimizations + _ = pipe(**self.get_dummy_inputs()) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.52761, 0.59977, 0.49033, 0.49619, 0.54282, 0.50311, 0.47600, 0.40918, 0.45203]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_euler(self): + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.52911, 0.60004, 0.49229, 0.49805, 0.54502, 0.50680, 0.47777, 0.41028, 0.45304]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_euler_ancestral(self): + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.52911, 0.60004, 0.49229, 0.49805, 0.54502, 0.50680, 0.47777, 0.41028, 0.45304]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_dpm_multistep(self): + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.65331, 0.58277, 0.48204, 0.56059, 0.53665, 0.56235, 0.50969, 0.40009, 0.46552]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + +@nightly +@require_onnxruntime +@require_torch_gpu +class OnnxStableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): + @property + def gpu_provider(self): + return ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "15000000000", # 15GB + "arena_extend_strategy": "kSameAsRequested", + }, + ) + + @property + def gpu_options(self): + options = ort.SessionOptions() + options.enable_mem_pattern = False + return options + + def test_inference_default_pndm(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + init_image = init_image.resize((768, 512)) + # using the PNDM scheduler by default + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + revision="onnx", + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A fantasy landscape, trending on artstation" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + strength=0.75, + guidance_scale=7.5, + num_inference_steps=10, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 383:386, -1] + + assert images.shape == (1, 512, 768, 3) + expected_slice = np.array([0.4909, 0.5059, 0.5372, 0.4623, 0.4876, 0.5049, 0.4820, 0.4956, 0.5019]) + # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues + + assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 + + def test_inference_k_lms(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + init_image = init_image.resize((768, 512)) + lms_scheduler = LMSDiscreteScheduler.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx" + ) + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", + revision="onnx", + scheduler=lms_scheduler, + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A fantasy landscape, trending on artstation" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + strength=0.75, + guidance_scale=7.5, + num_inference_steps=20, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 383:386, -1] + + assert images.shape == (1, 512, 768, 3) + expected_slice = np.array([0.8043, 0.926, 0.9581, 0.8119, 0.8954, 0.913, 0.7209, 0.7463, 0.7431]) + # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues + + assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..1d46ff9a2f5f87d9dac90c48dd8ee7b84b7367fb --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py @@ -0,0 +1,141 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +from diffusers import LMSDiscreteScheduler, OnnxStableDiffusionInpaintPipeline + +from ...testing_utils import ( + is_onnx_available, + load_image, + nightly, + require_onnxruntime, + require_torch_gpu, +) +from ..test_pipelines_onnx_common import OnnxPipelineTesterMixin + + +if is_onnx_available(): + import onnxruntime as ort + + +class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): + # FIXME: add fast tests + pass + + +@nightly +@require_onnxruntime +@require_torch_gpu +class OnnxStableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): + @property + def gpu_provider(self): + return ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "15000000000", # 15GB + "arena_extend_strategy": "kSameAsRequested", + }, + ) + + @property + def gpu_options(self): + options = ort.SessionOptions() + options.enable_mem_pattern = False + return options + + def test_inference_default_pndm(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" + ) + pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", + revision="onnx", + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A red cat sitting on a park bench" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + guidance_scale=7.5, + num_inference_steps=10, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 255:258, -1] + + assert images.shape == (1, 512, 512, 3) + expected_slice = np.array([0.2514, 0.3007, 0.3517, 0.1790, 0.2382, 0.3167, 0.1944, 0.2273, 0.2464]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_k_lms(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" + ) + lms_scheduler = LMSDiscreteScheduler.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", subfolder="scheduler", revision="onnx" + ) + pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", + revision="onnx", + scheduler=lms_scheduler, + safety_checker=None, + feature_extractor=None, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A red cat sitting on a park bench" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + guidance_scale=7.5, + num_inference_steps=20, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 255:258, -1] + + assert images.shape == (1, 512, 512, 3) + expected_slice = np.array([0.0086, 0.0077, 0.0083, 0.0093, 0.0107, 0.0139, 0.0094, 0.0097, 0.0125]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py new file mode 100644 index 0000000000000000000000000000000000000000..55d9d38d64bd84573ecb571739a8eabe6f9b6951 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_upscale.py @@ -0,0 +1,231 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import unittest + +import numpy as np + +from diffusers import ( + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + OnnxStableDiffusionUpscalePipeline, + PNDMScheduler, +) + +from ...testing_utils import ( + floats_tensor, + is_onnx_available, + load_image, + nightly, + require_onnxruntime, + require_torch_gpu, +) +from ..test_pipelines_onnx_common import OnnxPipelineTesterMixin + + +if is_onnx_available(): + import onnxruntime as ort + + +# TODO: (Dhruv) Update hub_checkpoint repo_id +@unittest.skip( + "There is a potential backdoor vulnerability in the hub_checkpoint. Skip running this test until resolved" +) +class OnnxStableDiffusionUpscalePipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): + # TODO: is there an appropriate internal test set? + hub_checkpoint = "ssube/stable-diffusion-x4-upscaler-onnx" + + def get_dummy_inputs(self, seed=0): + image = floats_tensor((1, 3, 128, 128), rng=random.Random(seed)) + generator = np.random.RandomState(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_pipeline_default_ddpm(self): + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + # started as 128, should now be 512 + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.6957, 0.7002, 0.7186, 0.6881, 0.6693, 0.6910, 0.7445, 0.7274, 0.7056]) + assert np.abs(image_slice - expected_slice).max() < 1e-1 + + def test_pipeline_pndm(self): + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config, skip_prk_steps=True) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.7349, 0.7347, 0.7034, 0.7696, 0.7876, 0.7597, 0.7916, 0.8085, 0.8036]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_dpm_multistep(self): + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.7659278, 0.76437664, 0.75579107, 0.7691116, 0.77666986, 0.7727672, 0.7758664, 0.7812226, 0.76942515] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_euler(self): + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.6974782, 0.68902093, 0.70135885, 0.7583618, 0.7804545, 0.7854912, 0.78667426, 0.78743863, 0.78070223] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + def test_pipeline_euler_ancestral(self): + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider") + pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.77424496, 0.773601, 0.7645288, 0.7769598, 0.7772739, 0.7738688, 0.78187233, 0.77879584, 0.767043] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1 + + +@nightly +@require_onnxruntime +@require_torch_gpu +class OnnxStableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase): + @property + def gpu_provider(self): + return ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "15000000000", # 15GB + "arena_extend_strategy": "kSameAsRequested", + }, + ) + + @property + def gpu_options(self): + options = ort.SessionOptions() + options.enable_mem_pattern = False + return options + + def test_inference_default_ddpm(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + init_image = init_image.resize((128, 128)) + # using the PNDM scheduler by default + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained( + "ssube/stable-diffusion-x4-upscaler-onnx", + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A fantasy landscape, trending on artstation" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + guidance_scale=7.5, + num_inference_steps=10, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 383:386, -1] + + assert images.shape == (1, 512, 512, 3) + expected_slice = np.array([0.4883, 0.4947, 0.4980, 0.4975, 0.4982, 0.4980, 0.5000, 0.5006, 0.4972]) + # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues + + assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 + + def test_inference_k_lms(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + init_image = init_image.resize((128, 128)) + lms_scheduler = LMSDiscreteScheduler.from_pretrained( + "ssube/stable-diffusion-x4-upscaler-onnx", subfolder="scheduler" + ) + pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained( + "ssube/stable-diffusion-x4-upscaler-onnx", + scheduler=lms_scheduler, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A fantasy landscape, trending on artstation" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + guidance_scale=7.5, + num_inference_steps=20, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 383:386, -1] + + assert images.shape == (1, 512, 512, 3) + expected_slice = np.array( + [0.50173753, 0.50223356, 0.502039, 0.50233036, 0.5023725, 0.5022601, 0.5018758, 0.50234085, 0.50241566] + ) + # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues + + assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..c9d9525b2e45c8b4e99ac98177c3f1d6b6347ce5 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -0,0 +1,1453 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import gc +import tempfile +import time +import unittest + +import numpy as np +import torch +from huggingface_hub import hf_hub_download +from transformers import ( + CLIPTextConfig, + CLIPTextModel, + CLIPTokenizer, +) + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LCMScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusionPipeline, + UNet2DConditionModel, + logging, +) + +from ...testing_utils import ( + CaptureLogger, + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + load_numpy, + nightly, + numpy_cosine_similarity_distance, + require_accelerate_version_greater, + require_torch_accelerator, + require_torch_multi_accelerator, + skip_mps, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionPipelineFastTests( + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionPipeline + params = TEXT_TO_IMAGE_PARAMS + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self, time_cond_proj_dim=None): + cross_attention_dim = 8 + + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(4, 8), + layers_per_block=1, + sample_size=32, + time_cond_proj_dim=time_cond_proj_dim, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=cross_attention_dim, + norm_num_groups=2, + ) + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[4, 8], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + norm_num_groups=2, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=cross_attention_dim, + intermediate_size=16, + layer_norm_eps=1e-05, + num_attention_heads=2, + num_hidden_layers=2, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_ddim(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.1763, 0.4776, 0.4986, 0.2566, 0.3802, 0.4596, 0.5363, 0.3277, 0.3949]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.2368, 0.4900, 0.5019, 0.2723, 0.4473, 0.4578, 0.4551, 0.3532, 0.4133]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.2368, 0.4900, 0.5019, 0.2723, 0.4473, 0.4578, 0.4551, 0.3532, 0.4133]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_ays(self): + from diffusers.schedulers import AysSchedules + + timestep_schedule = AysSchedules["StableDiffusionTimesteps"] + sigma_schedule = AysSchedules["StableDiffusionSigmas"] + + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = EulerDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["num_inference_steps"] = 10 + output = sd_pipe(**inputs).images + + inputs = self.get_dummy_inputs(device) + inputs["num_inference_steps"] = None + inputs["timesteps"] = timestep_schedule + output_ts = sd_pipe(**inputs).images + + inputs = self.get_dummy_inputs(device) + inputs["num_inference_steps"] = None + inputs["sigmas"] = sigma_schedule + output_sigmas = sd_pipe(**inputs).images + + assert np.abs(output_sigmas.flatten() - output_ts.flatten()).max() < 1e-3, ( + "ays timesteps and ays sigmas should have the same outputs" + ) + assert np.abs(output.flatten() - output_ts.flatten()).max() > 1e-3, ( + "use ays timesteps should have different outputs" + ) + assert np.abs(output.flatten() - output_sigmas.flatten()).max() > 1e-3, ( + "use ays sigmas should have different outputs" + ) + + def test_stable_diffusion_prompt_embeds(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + inputs["prompt"] = 3 * [inputs["prompt"]] + + # forward + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + inputs = self.get_dummy_inputs(torch_device) + prompt = 3 * [inputs.pop("prompt")] + + text_inputs = sd_pipe.tokenizer( + prompt, + padding="max_length", + max_length=sd_pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_inputs = text_inputs["input_ids"].to(torch_device) + + prompt_embeds = sd_pipe.text_encoder(text_inputs)[0] + + inputs["prompt_embeds"] = prompt_embeds + + # forward + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + def test_stable_diffusion_negative_prompt_embeds(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + negative_prompt = 3 * ["this is a negative prompt"] + inputs["negative_prompt"] = negative_prompt + inputs["prompt"] = 3 * [inputs["prompt"]] + + # forward + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + inputs = self.get_dummy_inputs(torch_device) + prompt = 3 * [inputs.pop("prompt")] + + embeds = [] + for p in [prompt, negative_prompt]: + text_inputs = sd_pipe.tokenizer( + p, + padding="max_length", + max_length=sd_pipe.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_inputs = text_inputs["input_ids"].to(torch_device) + + embeds.append(sd_pipe.text_encoder(text_inputs)[0]) + + inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = embeds + + # forward + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + def test_stable_diffusion_ddim_factor_8(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs, height=136, width=136) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 136, 136, 3) + expected_slice = np.array([0.4720, 0.5426, 0.5160, 0.3961, 0.4696, 0.4296, 0.5738, 0.5888, 0.5481]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_pndm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = PNDMScheduler(skip_prk_steps=True) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.1941, 0.4748, 0.4880, 0.2222, 0.4221, 0.4545, 0.5604, 0.3488, 0.3902]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_no_safety_checker(self): + pipe = StableDiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-lms-pipe", safety_checker=None + ) + assert isinstance(pipe, StableDiffusionPipeline) + assert isinstance(pipe.scheduler, LMSDiscreteScheduler) + assert pipe.safety_checker is None + + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None + + # check that there's no error when saving a pipeline with one of the models being None + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) + pipe = StableDiffusionPipeline.from_pretrained(tmpdirname) + + # sanity check that the pipeline still works + assert pipe.safety_checker is None + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None + + def test_stable_diffusion_k_lms(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.2681, 0.4785, 0.4857, 0.2426, 0.4473, 0.4481, 0.5610, 0.3676, 0.3855]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_k_euler_ancestral(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.2682, 0.4782, 0.4855, 0.2424, 0.4472, 0.4479, 0.5612, 0.3676, 0.3854]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_k_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe.scheduler = EulerDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.2681, 0.4785, 0.4857, 0.2426, 0.4473, 0.4481, 0.5610, 0.3676, 0.3855]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_vae_slicing(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + image_count = 4 + + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * image_count + output_1 = sd_pipe(**inputs) + + # make sure sliced vae decode yields the same result + sd_pipe.enable_vae_slicing() + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * image_count + output_2 = sd_pipe(**inputs) + + # there is a small discrepancy at image borders vs. full batch decode + assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 3e-3 + + def test_stable_diffusion_vae_tiling(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # make sure here that pndm scheduler skips prk + components["safety_checker"] = None + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + + # Test that tiled decode at 512x512 yields the same result as the non-tiled decode + generator = torch.Generator(device=device).manual_seed(0) + output_1 = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + + # make sure tiled vae decode yields the same result + sd_pipe.enable_vae_tiling() + generator = torch.Generator(device=device).manual_seed(0) + output_2 = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + + assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 5e-1 + + # test that tiled decode works with various shapes + shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)] + for shape in shapes: + zeros = torch.zeros(shape).to(device) + sd_pipe.vae.decode(zeros) + + def test_stable_diffusion_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = PNDMScheduler(skip_prk_steps=True) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + negative_prompt = "french fries" + output = sd_pipe(**inputs, negative_prompt=negative_prompt) + + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.1907, 0.4709, 0.4858, 0.2224, 0.4223, 0.4539, 0.5606, 0.3489, 0.3900]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_long_prompt(self): + components = self.get_dummy_components() + components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + do_classifier_free_guidance = True + negative_prompt = None + num_images_per_prompt = 1 + logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion") + logger.setLevel(logging.WARNING) + + prompt = 100 * "@" + with CaptureLogger(logger) as cap_logger: + negative_text_embeddings, text_embeddings = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negative_text_embeddings is not None: + text_embeddings = torch.cat([negative_text_embeddings, text_embeddings]) + + # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25 + assert cap_logger.out.count("@") == 25 + + negative_prompt = "Hello" + with CaptureLogger(logger) as cap_logger_2: + negative_text_embeddings_2, text_embeddings_2 = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negative_text_embeddings_2 is not None: + text_embeddings_2 = torch.cat([negative_text_embeddings_2, text_embeddings_2]) + + assert cap_logger.out == cap_logger_2.out + + prompt = 25 * "@" + with CaptureLogger(logger) as cap_logger_3: + negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negative_text_embeddings_3 is not None: + text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3]) + + assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape + assert text_embeddings.shape[1] == 77 + assert cap_logger_3.out == "" + + def test_stable_diffusion_height_width_opt(self): + components = self.get_dummy_components() + components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + + output = sd_pipe(prompt, num_inference_steps=1, output_type="np") + image_shape = output.images[0].shape[:2] + assert image_shape == (64, 64) + + output = sd_pipe(prompt, num_inference_steps=1, height=96, width=96, output_type="np") + image_shape = output.images[0].shape[:2] + assert image_shape == (96, 96) + + config = dict(sd_pipe.unet.config) + config["sample_size"] = 96 + sd_pipe.unet = UNet2DConditionModel.from_config(config).to(torch_device) + output = sd_pipe(prompt, num_inference_steps=1, output_type="np") + image_shape = output.images[0].shape[:2] + assert image_shape == (192, 192) + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + # MPS currently doesn't support ComplexFloats, which are required for freeU - see https://github.com/huggingface/diffusers/issues/7569. + @skip_mps + def test_freeu_enabled(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + output = sd_pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + sd_pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) + output_freeu = sd_pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + assert not np.allclose(output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1]), ( + "Enabling of FreeU should lead to different results." + ) + + def test_freeu_disabled(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + output = sd_pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + sd_pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) + sd_pipe.disable_freeu() + + freeu_keys = {"s1", "s2", "b1", "b2"} + for upsample_block in sd_pipe.unet.up_blocks: + for key in freeu_keys: + assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None." + + output_no_freeu = sd_pipe( + prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0) + ).images + + assert np.allclose(output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1]), ( + "Disabling of FreeU should lead to results similar to the default pipeline results." + ) + + def test_fused_qkv_projections(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + original_image_slice = image[0, -3:, -3:, -1] + + sd_pipe.fuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice_fused = image[0, -3:, -3:, -1] + + sd_pipe.unfuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice_disabled = image[0, -3:, -3:, -1] + + assert np.allclose(original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2), ( + "Fusion of QKV projections shouldn't affect the outputs." + ) + assert np.allclose(image_slice_fused, image_slice_disabled, atol=1e-2, rtol=1e-2), ( + "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled." + ) + assert np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2), ( + "Original outputs should match when fused QKV projections are disabled." + ) + + def test_pipeline_interrupt(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + num_inference_steps = 3 + + # store intermediate latents from the generation process + class PipelineState: + def __init__(self): + self.state = [] + + def apply(self, pipe, i, t, callback_kwargs): + self.state.append(callback_kwargs["latents"]) + return callback_kwargs + + pipe_state = PipelineState() + sd_pipe( + prompt, + num_inference_steps=num_inference_steps, + output_type="np", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=pipe_state.apply, + ).images + + # interrupt generation at step index + interrupt_step_idx = 1 + + def callback_on_step_end(pipe, i, t, callback_kwargs): + if i == interrupt_step_idx: + pipe._interrupt = True + + return callback_kwargs + + output_interrupted = sd_pipe( + prompt, + num_inference_steps=num_inference_steps, + output_type="latent", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=callback_on_step_end, + ).images + + # fetch intermediate latents at the interrupted step + # from the completed generation process + intermediate_latent = pipe_state.state[interrupt_step_idx] + + # compare the intermediate latent to the output of the interrupted process + # they should be the same + assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) + + def test_pipeline_accept_tuple_type_unet_sample_size(self): + # the purpose of this test is to see whether the pipeline would accept a unet with the tuple-typed sample size + sd_repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + sample_size = [60, 80] + customised_unet = UNet2DConditionModel(sample_size=sample_size) + pipe = StableDiffusionPipeline.from_pretrained(sd_repo_id, unet=customised_unet) + assert pipe.unet.config.sample_size == sample_size + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableDiffusionPipelineSlowTests(unittest.TestCase): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "prompt": "a photograph of an astronaut riding a horse", + "latents": latents, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_1_1_pndm(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-1") + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.4363, 0.4355, 0.3667, 0.4066, 0.3970, 0.3866, 0.4394, 0.4356, 0.4059]) + assert np.abs(image_slice - expected_slice).max() < 3e-3 + + def test_stable_diffusion_v1_4_with_freeu(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 25 + + sd_pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) + image = sd_pipe(**inputs).images + image = image[0, -3:, -3:, -1].flatten() + expected_image = [0.0721, 0.0588, 0.0268, 0.0384, 0.0636, 0.0, 0.0429, 0.0344, 0.0309] + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_stable_diffusion_1_4_pndm(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.5740, 0.4784, 0.3162, 0.6358, 0.5831, 0.5505, 0.5082, 0.5631, 0.5575]) + assert np.abs(image_slice - expected_slice).max() < 3e-3 + + def test_stable_diffusion_ddim(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None) + sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.38019, 0.28647, 0.27321, 0.40377, 0.38290, 0.35446, 0.39218, 0.38165, 0.42239]) + assert np.abs(image_slice - expected_slice).max() < 1e-4 + + def test_stable_diffusion_lms(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None) + sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.10542, 0.09620, 0.07332, 0.09015, 0.09382, 0.07597, 0.08496, 0.07806, 0.06455]) + assert np.abs(image_slice - expected_slice).max() < 3e-3 + + def test_stable_diffusion_dpm(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None) + sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config( + sd_pipe.scheduler.config, + final_sigmas_type="sigma_min", + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.03503, 0.03494, 0.01087, 0.03128, 0.02552, 0.00803, 0.00742, 0.00372, 0.00000]) + assert np.abs(image_slice - expected_slice).max() < 3e-3 + + def test_stable_diffusion_attention_slicing(self): + backend_reset_peak_memory_stats(torch_device) + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) + pipe.unet.set_default_attn_processor() + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + # enable attention slicing + pipe.enable_attention_slicing() + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image_sliced = pipe(**inputs).images + + mem_bytes = backend_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + # make sure that less than 3.75 GB is allocated + assert mem_bytes < 3.75 * 10**9 + + # disable slicing + pipe.disable_attention_slicing() + pipe.unet.set_default_attn_processor() + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image = pipe(**inputs).images + + # make sure that more than 3.75 GB is allocated + mem_bytes = backend_max_memory_allocated(torch_device) + assert mem_bytes > 3.75 * 10**9 + max_diff = numpy_cosine_similarity_distance(image_sliced.flatten(), image.flatten()) + assert max_diff < 1e-3 + + def test_stable_diffusion_vae_slicing(self): + backend_reset_peak_memory_stats(torch_device) + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + # enable vae slicing + pipe.enable_vae_slicing() + inputs = self.get_inputs(torch_device, dtype=torch.float16) + inputs["prompt"] = [inputs["prompt"]] * 4 + inputs["latents"] = torch.cat([inputs["latents"]] * 4) + image_sliced = pipe(**inputs).images + + mem_bytes = backend_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + # make sure that less than 4 GB is allocated + assert mem_bytes < 4e9 + + # disable vae slicing + pipe.disable_vae_slicing() + inputs = self.get_inputs(torch_device, dtype=torch.float16) + inputs["prompt"] = [inputs["prompt"]] * 4 + inputs["latents"] = torch.cat([inputs["latents"]] * 4) + image = pipe(**inputs).images + + # make sure that more than 4 GB is allocated + mem_bytes = backend_max_memory_allocated(torch_device) + assert mem_bytes > 4e9 + # There is a small discrepancy at the image borders vs. a fully batched version. + max_diff = numpy_cosine_similarity_distance(image_sliced.flatten(), image.flatten()) + assert max_diff < 1e-2 + + def test_stable_diffusion_vae_tiling(self): + backend_reset_peak_memory_stats(torch_device) + model_id = "CompVis/stable-diffusion-v1-4" + pipe = StableDiffusionPipeline.from_pretrained( + model_id, variant="fp16", torch_dtype=torch.float16, safety_checker=None + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + pipe.unet = pipe.unet.to(memory_format=torch.channels_last) + pipe.vae = pipe.vae.to(memory_format=torch.channels_last) + + prompt = "a photograph of an astronaut riding a horse" + + # enable vae tiling + pipe.enable_vae_tiling() + pipe.enable_model_cpu_offload(device=torch_device) + generator = torch.Generator(device="cpu").manual_seed(0) + output_chunked = pipe( + [prompt], + width=1024, + height=1024, + generator=generator, + guidance_scale=7.5, + num_inference_steps=2, + output_type="np", + ) + image_chunked = output_chunked.images + + mem_bytes = backend_max_memory_allocated(torch_device) + + # disable vae tiling + pipe.disable_vae_tiling() + generator = torch.Generator(device="cpu").manual_seed(0) + output = pipe( + [prompt], + width=1024, + height=1024, + generator=generator, + guidance_scale=7.5, + num_inference_steps=2, + output_type="np", + ) + image = output.images + + assert mem_bytes < 1e10 + max_diff = numpy_cosine_similarity_distance(image_chunked.flatten(), image.flatten()) + assert max_diff < 1e-2 + + def test_stable_diffusion_fp16_vs_autocast(self): + # this test makes sure that the original model with autocast + # and the new model with fp16 yield the same result + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image_fp16 = pipe(**inputs).images + + with torch.autocast(torch_device): + inputs = self.get_inputs(torch_device) + image_autocast = pipe(**inputs).images + + # Make sure results are close enough + diff = np.abs(image_fp16.flatten() - image_autocast.flatten()) + # They ARE different since ops are not run always at the same precision + # however, they should be extremely close. + assert diff.mean() < 2e-2 + + def test_stable_diffusion_intermediate_state(self): + number_of_steps = 0 + + def callback_fn(step: int, timestep: int, latents: torch.Tensor) -> None: + callback_fn.has_been_called = True + nonlocal number_of_steps + number_of_steps += 1 + if step == 1: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array( + [-0.5693, -0.3018, -0.9746, 0.0518, -0.8770, 0.7559, -1.7402, 0.1022, 1.1582] + ) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + elif step == 2: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array( + [-0.1958, -0.2993, -1.0166, -0.5005, -0.4810, 0.6162, -0.9492, 0.6621, 1.4492] + ) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + + callback_fn.has_been_called = False + + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + pipe(**inputs, callback=callback_fn, callback_steps=1) + assert callback_fn.has_been_called + assert number_of_steps == inputs["num_inference_steps"] + + def test_stable_diffusion_low_cpu_mem_usage(self): + pipeline_id = "CompVis/stable-diffusion-v1-4" + + start_time = time.time() + pipeline_low_cpu_mem_usage = StableDiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16) + pipeline_low_cpu_mem_usage.to(torch_device) + low_cpu_mem_usage_time = time.time() - start_time + + start_time = time.time() + _ = StableDiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16, low_cpu_mem_usage=False) + normal_load_time = time.time() - start_time + + assert 2 * low_cpu_mem_usage_time < normal_load_time + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + _ = pipe(**inputs) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.8 GB is allocated + assert mem_bytes < 2.8 * 10**9 + + def test_stable_diffusion_pipeline_with_model_offloading(self): + backend_empty_cache(torch_device) + backend_reset_peak_memory_stats(torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + + # Normal inference + + pipe = StableDiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + torch_dtype=torch.float16, + ) + pipe.unet.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + outputs = pipe(**inputs) + mem_bytes = backend_max_memory_allocated(torch_device) + + # With model offloading + + # Reload but don't move to cuda + pipe = StableDiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + torch_dtype=torch.float16, + ) + pipe.unet.set_default_attn_processor() + + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe.enable_model_cpu_offload(device=torch_device) + pipe.set_progress_bar_config(disable=None) + inputs = self.get_inputs(torch_device, dtype=torch.float16) + + outputs_offloaded = pipe(**inputs) + mem_bytes_offloaded = backend_max_memory_allocated(torch_device) + + images = outputs.images + offloaded_images = outputs_offloaded.images + + max_diff = numpy_cosine_similarity_distance(images.flatten(), offloaded_images.flatten()) + assert max_diff < 1e-3 + assert mem_bytes_offloaded < mem_bytes + assert mem_bytes_offloaded < 3.5 * 10**9 + for module in pipe.text_encoder, pipe.unet, pipe.vae: + assert module.device == torch.device("cpu") + + # With attention slicing + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe.enable_attention_slicing() + _ = pipe(**inputs) + mem_bytes_slicing = backend_max_memory_allocated(torch_device) + + assert mem_bytes_slicing < mem_bytes_offloaded + assert mem_bytes_slicing < 3 * 10**9 + + def test_stable_diffusion_textual_inversion(self): + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") + pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons") + + a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt") + a111_file_neg = hf_hub_download( + "hf-internal-testing/text_inv_embedding_a1111_format", "winter_style_negative.pt" + ) + pipe.load_textual_inversion(a111_file) + pipe.load_textual_inversion(a111_file_neg) + pipe.to(torch_device) + + generator = torch.Generator(device="cpu").manual_seed(1) + + prompt = "An logo of a turtle in strong Style-Winter with " + neg_prompt = "Style-Winter-neg" + + image = pipe(prompt=prompt, negative_prompt=neg_prompt, generator=generator, output_type="np").images[0] + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text_inv/winter_logo_style.npy" + ) + + max_diff = np.abs(expected_image - image).max() + assert max_diff < 8e-1 + + def test_stable_diffusion_textual_inversion_with_model_cpu_offload(self): + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") + pipe.enable_model_cpu_offload(device=torch_device) + pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons") + + a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt") + a111_file_neg = hf_hub_download( + "hf-internal-testing/text_inv_embedding_a1111_format", "winter_style_negative.pt" + ) + pipe.load_textual_inversion(a111_file) + pipe.load_textual_inversion(a111_file_neg) + + generator = torch.Generator(device="cpu").manual_seed(1) + + prompt = "An logo of a turtle in strong Style-Winter with " + neg_prompt = "Style-Winter-neg" + + image = pipe(prompt=prompt, negative_prompt=neg_prompt, generator=generator, output_type="np").images[0] + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text_inv/winter_logo_style.npy" + ) + + max_diff = np.abs(expected_image - image).max() + assert max_diff < 8e-1 + + def test_stable_diffusion_textual_inversion_with_sequential_cpu_offload(self): + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") + pipe.enable_sequential_cpu_offload(device=torch_device) + pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons").to(torch_device) + + a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt") + a111_file_neg = hf_hub_download( + "hf-internal-testing/text_inv_embedding_a1111_format", "winter_style_negative.pt" + ) + pipe.load_textual_inversion(a111_file) + pipe.load_textual_inversion(a111_file_neg) + + generator = torch.Generator(device="cpu").manual_seed(1) + + prompt = "An logo of a turtle in strong Style-Winter with " + neg_prompt = "Style-Winter-neg" + + image = pipe(prompt=prompt, negative_prompt=neg_prompt, generator=generator, output_type="np").images[0] + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text_inv/winter_logo_style.npy" + ) + + max_diff = np.abs(expected_image - image).max() + assert max_diff < 8e-1 + + +@slow +@require_torch_accelerator +class StableDiffusionPipelineCkptTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_download_from_hub(self): + ckpt_paths = [ + "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors", + "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors", + ] + + for ckpt_path in ckpt_paths: + pipe = StableDiffusionPipeline.from_single_file(ckpt_path, torch_dtype=torch.float16) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + + image_out = pipe("test", num_inference_steps=1, output_type="np").images[0] + + assert image_out.shape == (512, 512, 3) + + def test_download_local(self): + ckpt_filename = hf_hub_download( + "stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors" + ) + config_filename = hf_hub_download("stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-inference.yaml") + + pipe = StableDiffusionPipeline.from_single_file( + ckpt_filename, config_files={"v1": config_filename}, torch_dtype=torch.float16 + ) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + + image_out = pipe("test", num_inference_steps=1, output_type="np").images[0] + + assert image_out.shape == (512, 512, 3) + + +@nightly +@require_torch_accelerator +class StableDiffusionPipelineNightlyTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "prompt": "a photograph of an astronaut riding a horse", + "latents": latents, + "generator": generator, + "num_inference_steps": 50, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_1_4_pndm(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_text2img/stable_diffusion_1_4_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_stable_diffusion_1_5_pndm(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5").to( + torch_device + ) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_text2img/stable_diffusion_1_5_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_stable_diffusion_ddim(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to(torch_device) + sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_text2img/stable_diffusion_1_4_ddim.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 3e-3 + + def test_stable_diffusion_lms(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to(torch_device) + sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_text2img/stable_diffusion_1_4_lms.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_stable_diffusion_euler(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to(torch_device) + sd_pipe.scheduler = EulerDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_text2img/stable_diffusion_1_4_euler.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + +# (sayakpaul): This test suite was run in the DGX with two GPUs (1, 2). +@slow +@require_torch_multi_accelerator +@require_accelerate_version_greater("0.27.0") +class StableDiffusionPipelineDeviceMapTests(unittest.TestCase): + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, generator_device="cpu", seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a photograph of an astronaut riding a horse", + "generator": generator, + "num_inference_steps": 50, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def get_pipeline_output_without_device_map(self): + sd_pipe = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16 + ).to(torch_device) + sd_pipe.set_progress_bar_config(disable=True) + inputs = self.get_inputs() + no_device_map_image = sd_pipe(**inputs).images + + del sd_pipe + + return no_device_map_image + + def test_forward_pass_balanced_device_map(self): + no_device_map_image = self.get_pipeline_output_without_device_map() + + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16 + ) + sd_pipe_with_device_map.set_progress_bar_config(disable=True) + inputs = self.get_inputs() + device_map_image = sd_pipe_with_device_map(**inputs).images + + max_diff = np.abs(device_map_image - no_device_map_image).max() + assert max_diff < 1e-3 + + def test_components_put_in_right_devices(self): + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16 + ) + + assert len(set(sd_pipe_with_device_map.hf_device_map.values())) >= 2 + + def test_max_memory(self): + no_device_map_image = self.get_pipeline_output_without_device_map() + + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", + device_map="balanced", + max_memory={0: "1GB", 1: "1GB"}, + torch_dtype=torch.float16, + ) + sd_pipe_with_device_map.set_progress_bar_config(disable=True) + inputs = self.get_inputs() + device_map_image = sd_pipe_with_device_map(**inputs).images + + max_diff = np.abs(device_map_image - no_device_map_image).max() + assert max_diff < 1e-3 + + def test_reset_device_map(self): + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16 + ) + sd_pipe_with_device_map.reset_device_map() + + assert sd_pipe_with_device_map.hf_device_map is None + + for name, component in sd_pipe_with_device_map.components.items(): + if isinstance(component, torch.nn.Module): + assert component.device.type == "cpu" + + def test_reset_device_map_to(self): + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16 + ) + sd_pipe_with_device_map.reset_device_map() + + assert sd_pipe_with_device_map.hf_device_map is None + + # Make sure `to()` can be used and the pipeline can be called. + pipe = sd_pipe_with_device_map.to(torch_device) + _ = pipe("hello", num_inference_steps=2) + + def test_reset_device_map_enable_model_cpu_offload(self): + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16 + ) + sd_pipe_with_device_map.reset_device_map() + + assert sd_pipe_with_device_map.hf_device_map is None + + # Make sure `enable_model_cpu_offload()` can be used and the pipeline can be called. + sd_pipe_with_device_map.enable_model_cpu_offload(device=torch_device) + _ = sd_pipe_with_device_map("hello", num_inference_steps=2) + + def test_reset_device_map_enable_sequential_cpu_offload(self): + sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16 + ) + sd_pipe_with_device_map.reset_device_map() + + assert sd_pipe_with_device_map.hf_device_map is None + + # Make sure `enable_sequential_cpu_offload()` can be used and the pipeline can be called. + sd_pipe_with_device_map.enable_sequential_cpu_offload(device=torch_device) + _ = sd_pipe_with_device_map("hello", num_inference_steps=2) diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..a0b7268b9dd40c0b20b93423af3afb2d18e5b803 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -0,0 +1,714 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + AutoencoderTiny, + DDIMScheduler, + DPMSolverMultistepScheduler, + HeunDiscreteScheduler, + LCMScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusionImg2ImgPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + nightly, + require_torch_accelerator, + skip_mps, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionImg2ImgPipelineFastTests( + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS + + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + time_cond_proj_dim=time_cond_proj_dim, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_tiny_autoencoder(self): + return AutoencoderTiny(in_channels=3, out_channels=3, latent_channels=4) + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_img2img_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4555, 0.3216, 0.4049, 0.4620, 0.4618, 0.4126, 0.4122, 0.4629, 0.4579]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_default_case_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.5709, 0.4614, 0.4587, 0.5978, 0.5298, 0.6910, 0.6240, 0.5212, 0.5454]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_default_case_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.5709, 0.4614, 0.4587, 0.5978, 0.5298, 0.6910, 0.6240, 0.5212, 0.5454]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + negative_prompt = "french fries" + output = sd_pipe(**inputs, negative_prompt=negative_prompt) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4593, 0.3408, 0.4232, 0.4749, 0.4476, 0.4115, 0.4357, 0.4733, 0.4663]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_ip_adapter(self): + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array([0.4932, 0.5092, 0.5135, 0.5517, 0.5626, 0.6621, 0.6490, 0.5021, 0.5441]) + return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + + def test_stable_diffusion_img2img_multiple_init_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * 2 + inputs["image"] = inputs["image"].repeat(2, 1, 1, 1) + image = sd_pipe(**inputs).images + image_slice = image[-1, -3:, -3:, -1] + + assert image.shape == (2, 32, 32, 3) + expected_slice = np.array([0.4241, 0.5576, 0.5711, 0.4792, 0.4311, 0.5952, 0.5827, 0.5138, 0.5109]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_k_lms(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = LMSDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4398, 0.4949, 0.4337, 0.6580, 0.5555, 0.4338, 0.5769, 0.5955, 0.5175]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_tiny_autoencoder(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe.vae = self.get_dummy_tiny_autoencoder() + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.00669, 0.00669, 0.0, 0.00693, 0.00858, 0.0, 0.00567, 0.00515, 0.00125]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + @skip_mps + def test_save_load_local(self): + return super().test_save_load_local() + + @skip_mps + def test_dict_tuple_outputs_equivalent(self): + return super().test_dict_tuple_outputs_equivalent() + + @skip_mps + def test_save_load_optional_components(self): + return super().test_save_load_optional_components() + + @skip_mps + def test_attention_slicing_forward_pass(self): + return super().test_attention_slicing_forward_pass(expected_max_diff=5e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_float16_inference(self): + super().test_float16_inference(expected_max_diff=5e-1) + + def test_pipeline_interrupt(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + prompt = "hey" + num_inference_steps = 3 + + # store intermediate latents from the generation process + class PipelineState: + def __init__(self): + self.state = [] + + def apply(self, pipe, i, t, callback_kwargs): + self.state.append(callback_kwargs["latents"]) + return callback_kwargs + + pipe_state = PipelineState() + sd_pipe( + prompt, + image=inputs["image"], + num_inference_steps=num_inference_steps, + output_type="np", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=pipe_state.apply, + ).images + + # interrupt generation at step index + interrupt_step_idx = 1 + + def callback_on_step_end(pipe, i, t, callback_kwargs): + if i == interrupt_step_idx: + pipe._interrupt = True + + return callback_kwargs + + output_interrupted = sd_pipe( + prompt, + image=inputs["image"], + num_inference_steps=num_inference_steps, + output_type="latent", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=callback_on_step_end, + ).images + + # fetch intermediate latents at the interrupted step + # from the completed generation process + intermediate_latent = pipe_state.state[interrupt_step_idx] + + # compare the intermediate latent to the output of the interrupted process + # they should be the same + assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "image": init_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_img2img_default(self): + pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 768, 3) + expected_slice = np.array([0.4300, 0.4662, 0.4930, 0.3990, 0.4307, 0.4525, 0.3719, 0.4064, 0.3923]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_k_lms(self): + pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None) + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 768, 3) + expected_slice = np.array([0.0389, 0.0346, 0.0415, 0.0290, 0.0218, 0.0210, 0.0408, 0.0567, 0.0271]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_ddim(self): + pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 768, 3) + expected_slice = np.array([0.0593, 0.0607, 0.0851, 0.0582, 0.0636, 0.0721, 0.0751, 0.0981, 0.0781]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_img2img_intermediate_state(self): + number_of_steps = 0 + + def callback_fn(step: int, timestep: int, latents: torch.Tensor) -> None: + callback_fn.has_been_called = True + nonlocal number_of_steps + number_of_steps += 1 + if step == 1: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 96) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([-0.4958, 0.5107, 1.1045, 2.7539, 4.6680, 3.8320, 1.5049, 1.8633, 2.6523]) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + elif step == 2: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 96) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([-0.4956, 0.5078, 1.0918, 2.7520, 4.6484, 3.8125, 1.5146, 1.8633, 2.6367]) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + + callback_fn.has_been_called = False + + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16 + ) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + pipe(**inputs, callback=callback_fn, callback_steps=1) + assert callback_fn.has_been_called + assert number_of_steps == 2 + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + _ = pipe(**inputs) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.2 GB is allocated + assert mem_bytes < 2.2 * 10**9 + + def test_stable_diffusion_pipeline_with_model_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + + # Normal inference + + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + safety_checker=None, + torch_dtype=torch.float16, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe(**inputs) + mem_bytes = backend_max_memory_allocated(torch_device) + + # With model offloading + + # Reload but don't move to cuda + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + safety_checker=None, + torch_dtype=torch.float16, + ) + + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe.enable_model_cpu_offload(device=torch_device) + pipe.set_progress_bar_config(disable=None) + _ = pipe(**inputs) + mem_bytes_offloaded = backend_max_memory_allocated(torch_device) + + assert mem_bytes_offloaded < mem_bytes + for module in pipe.text_encoder, pipe.unet, pipe.vae: + assert module.device == torch.device("cpu") + + def test_img2img_2nd_order(self): + sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + sd_pipe.scheduler = HeunDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 10 + inputs["strength"] = 0.75 + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/img2img_heun.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 5e-2 + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 11 + inputs["strength"] = 0.75 + image_other = sd_pipe(**inputs).images[0] + + mean_diff = np.abs(image - image_other).mean() + + # images should be very similar + assert mean_diff < 5e-2 + + def test_stable_diffusion_img2img_pipeline_multiple_of_8(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + # resize to resolution that is divisible by 8 but not 16 or 32 + init_image = init_image.resize((760, 504)) + + model_id = "CompVis/stable-diffusion-v1-4" + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + model_id, + safety_checker=None, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "A fantasy landscape, trending on artstation" + + generator = torch.manual_seed(0) + output = pipe( + prompt=prompt, + image=init_image, + strength=0.75, + guidance_scale=7.5, + generator=generator, + output_type="np", + ) + image = output.images[0] + + image_slice = image[255:258, 383:386, -1] + + assert image.shape == (504, 760, 3) + expected_slice = np.array([0.9393, 0.9500, 0.9399, 0.9438, 0.9458, 0.9400, 0.9455, 0.9414, 0.9423]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 + + def test_img2img_safety_checker_works(self): + sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 20 + # make sure the safety checker is activated + inputs["prompt"] = "naked, sex, porn" + out = sd_pipe(**inputs) + + assert out.nsfw_content_detected[0], f"Safety checker should work for prompt: {inputs['prompt']}" + assert np.abs(out.images[0]).sum() < 1e-5 # should be all zeros + + +@nightly +@require_torch_accelerator +class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "image": init_image, + "generator": generator, + "num_inference_steps": 50, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_img2img_pndm(self): + sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/stable_diffusion_1_5_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_img2img_ddim(self): + sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/stable_diffusion_1_5_ddim.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_img2img_lms(self): + sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/stable_diffusion_1_5_lms.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_img2img_dpm(self): + sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 30 + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/stable_diffusion_1_5_dpm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..259806a9479c721a0856edaa029b7c2c496bfd90 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -0,0 +1,1098 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from huggingface_hub import hf_hub_download +from PIL import Image +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AsymmetricAutoencoderKL, + AutoencoderKL, + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + LCMScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusionInpaintPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + Expectations, + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + nightly, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionInpaintPipelineFastTests( + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionInpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset([]) + # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"mask", "masked_image_latents"}) + + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + time_cond_proj_dim=time_cond_proj_dim, + layers_per_block=2, + sample_size=32, + in_channels=9, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True): + # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched + if output_pil: + # Get random floats in [0, 1] as image + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image.cpu().permute(0, 2, 3, 1)[0] + mask_image = torch.ones_like(image) + # Convert image and mask_image to [0, 255] + image = 255 * image + mask_image = 255 * mask_image + # Convert to PIL image + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((img_res, img_res)) + mask_image = Image.fromarray(np.uint8(mask_image)).convert("RGB").resize((img_res, img_res)) + else: + # Get random floats in [0, 1] as image with spatial size (img_res, img_res) + image = floats_tensor((1, 3, img_res, img_res), rng=random.Random(seed)).to(device) + # Convert image to [-1, 1] + init_image = 2.0 * image - 1.0 + mask_image = torch.ones((1, 1, img_res, img_res), device=device) + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_inpaint(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4703, 0.5697, 0.3879, 0.5470, 0.6042, 0.4413, 0.5078, 0.4728, 0.4469]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_inpaint_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4931, 0.5988, 0.4569, 0.5556, 0.6650, 0.5087, 0.5966, 0.5358, 0.5269]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_inpaint_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4931, 0.5988, 0.4569, 0.5556, 0.6650, 0.5087, 0.5966, 0.5358, 0.5269]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_inpaint_image_tensor(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + out_pil = output.images + + inputs = self.get_dummy_inputs(device) + inputs["image"] = torch.tensor(np.array(inputs["image"]) / 127.5 - 1).permute(2, 0, 1).unsqueeze(0) + inputs["mask_image"] = torch.tensor(np.array(inputs["mask_image"]) / 255).permute(2, 0, 1)[:1].unsqueeze(0) + output = sd_pipe(**inputs) + out_tensor = output.images + + assert out_pil.shape == (1, 64, 64, 3) + assert np.abs(out_pil.flatten() - out_tensor.flatten()).max() < 5e-2 + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_stable_diffusion_inpaint_strength_zero_test(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + + # check that the pipeline raises value error when num_inference_steps is < 1 + inputs["strength"] = 0.01 + with self.assertRaises(ValueError): + sd_pipe(**inputs).images + + def test_stable_diffusion_inpaint_mask_latents(self): + device = "cpu" + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components).to(device) + sd_pipe.set_progress_bar_config(disable=None) + + # normal mask + normal image + ## `image`: pil, `mask_image``: pil, `masked_image_latents``: None + inputs = self.get_dummy_inputs(device) + inputs["strength"] = 0.9 + out_0 = sd_pipe(**inputs).images + + # image latents + mask latents + inputs = self.get_dummy_inputs(device) + image = sd_pipe.image_processor.preprocess(inputs["image"]).to(sd_pipe.device) + mask = sd_pipe.mask_processor.preprocess(inputs["mask_image"]).to(sd_pipe.device) + masked_image = image * (mask < 0.5) + + generator = torch.Generator(device=device).manual_seed(0) + image_latents = ( + sd_pipe.vae.encode(image).latent_dist.sample(generator=generator) * sd_pipe.vae.config.scaling_factor + ) + torch.randn((1, 4, 32, 32), generator=generator) + mask_latents = ( + sd_pipe.vae.encode(masked_image).latent_dist.sample(generator=generator) + * sd_pipe.vae.config.scaling_factor + ) + inputs["image"] = image_latents + inputs["masked_image_latents"] = mask_latents + inputs["mask_image"] = mask + inputs["strength"] = 0.9 + generator = torch.Generator(device=device).manual_seed(0) + torch.randn((1, 4, 32, 32), generator=generator) + inputs["generator"] = generator + out_1 = sd_pipe(**inputs).images + assert np.abs(out_0 - out_1).max() < 1e-2 + + def test_pipeline_interrupt(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + prompt = "hey" + num_inference_steps = 3 + + # store intermediate latents from the generation process + class PipelineState: + def __init__(self): + self.state = [] + + def apply(self, pipe, i, t, callback_kwargs): + self.state.append(callback_kwargs["latents"]) + return callback_kwargs + + pipe_state = PipelineState() + sd_pipe( + prompt, + image=inputs["image"], + mask_image=inputs["mask_image"], + num_inference_steps=num_inference_steps, + output_type="np", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=pipe_state.apply, + ).images + + # interrupt generation at step index + interrupt_step_idx = 1 + + def callback_on_step_end(pipe, i, t, callback_kwargs): + if i == interrupt_step_idx: + pipe._interrupt = True + + return callback_kwargs + + output_interrupted = sd_pipe( + prompt, + image=inputs["image"], + mask_image=inputs["mask_image"], + num_inference_steps=num_inference_steps, + output_type="latent", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=callback_on_step_end, + ).images + + # fetch intermediate latents at the interrupted step + # from the completed generation process + intermediate_latent = pipe_state.state[interrupt_step_idx] + + # compare the intermediate latent to the output of the interrupted process + # they should be the same + assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) + + def test_ip_adapter(self, from_simple=False, expected_pipe_slice=None): + if not from_simple: + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array( + [0.4390, 0.5452, 0.3772, 0.5448, 0.6031, 0.4480, 0.5194, 0.4687, 0.4640] + ) + return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, atol=1e-3, rtol=1e-3) + + +class StableDiffusionSimpleInpaintPipelineFastTests(StableDiffusionInpaintPipelineFastTests): + pipeline_class = StableDiffusionInpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset([]) + # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + time_cond_proj_dim=time_cond_proj_dim, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs_2images(self, device, seed=0, img_res=64): + # Get random floats in [0, 1] as image with spatial size (img_res, img_res) + image1 = floats_tensor((1, 3, img_res, img_res), rng=random.Random(seed)).to(device) + image2 = floats_tensor((1, 3, img_res, img_res), rng=random.Random(seed + 22)).to(device) + # Convert images to [-1, 1] + init_image1 = 2.0 * image1 - 1.0 + init_image2 = 2.0 * image2 - 1.0 + + # empty mask + mask_image = torch.zeros((1, 1, img_res, img_res), device=device) + + if str(device).startswith("mps"): + generator1 = torch.manual_seed(seed) + generator2 = torch.manual_seed(seed) + else: + generator1 = torch.Generator(device=device).manual_seed(seed) + generator2 = torch.Generator(device=device).manual_seed(seed) + + inputs = { + "prompt": ["A painting of a squirrel eating a burger"] * 2, + "image": [init_image1, init_image2], + "mask_image": [mask_image] * 2, + "generator": [generator1, generator2], + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_ip_adapter(self): + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array([0.6345, 0.5395, 0.5611, 0.5403, 0.5830, 0.5855, 0.5193, 0.5443, 0.5211]) + return super().test_ip_adapter(from_simple=True, expected_pipe_slice=expected_pipe_slice) + + def test_stable_diffusion_inpaint(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.6584, 0.5424, 0.5649, 0.5449, 0.5897, 0.6111, 0.5404, 0.5463, 0.5214]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_inpaint_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.6240, 0.5355, 0.5649, 0.5378, 0.5374, 0.6242, 0.5132, 0.5347, 0.5396]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_inpaint_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.6240, 0.5355, 0.5649, 0.5378, 0.5374, 0.6242, 0.5132, 0.5347, 0.5396]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_inpaint_2_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + # test to confirm if we pass two same image, we will get same output + inputs = self.get_dummy_inputs(device) + gen1 = torch.Generator(device=device).manual_seed(0) + gen2 = torch.Generator(device=device).manual_seed(0) + for name in ["prompt", "image", "mask_image"]: + inputs[name] = [inputs[name]] * 2 + inputs["generator"] = [gen1, gen2] + images = sd_pipe(**inputs).images + + assert images.shape == (2, 64, 64, 3) + + image_slice1 = images[0, -3:, -3:, -1] + image_slice2 = images[1, -3:, -3:, -1] + assert np.abs(image_slice1.flatten() - image_slice2.flatten()).max() < 1e-4 + + # test to confirm that if we pass two different images, we will get different output + inputs = self.get_dummy_inputs_2images(device) + images = sd_pipe(**inputs).images + assert images.shape == (2, 64, 64, 3) + + image_slice1 = images[0, -3:, -3:, -1] + image_slice2 = images[1, -3:, -3:, -1] + assert np.abs(image_slice1.flatten() - image_slice2.flatten()).max() > 1e-2 + + def test_stable_diffusion_inpaint_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device, output_pil=False) + half_dim = inputs["image"].shape[2] // 2 + inputs["mask_image"][0, 0, :half_dim, :half_dim] = 0 + + inputs["num_inference_steps"] = 4 + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array( + [[0.6387283, 0.5564158, 0.58631873, 0.5539942, 0.5494673, 0.6461868, 0.5251618, 0.5497595, 0.5508756]] + ) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-4 + + +@slow +@require_torch_accelerator +class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_mask.png" + ) + inputs = { + "prompt": "Face of a yellow cat, high resolution, sitting on a park bench", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_inpaint_ddim(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.0427, 0.0460, 0.0483, 0.0460, 0.0584, 0.0521, 0.1549, 0.1695, 0.1794]) + + assert np.abs(expected_slice - image_slice).max() < 6e-4 + + def test_stable_diffusion_inpaint_fp16(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", torch_dtype=torch.float16, safety_checker=None + ) + pipe.unet.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.1509, 0.1245, 0.1672, 0.1655, 0.1519, 0.1226, 0.1462, 0.1567, 0.2451]) + assert np.abs(expected_slice - image_slice).max() < 1e-1 + + def test_stable_diffusion_inpaint_pndm(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.0425, 0.0273, 0.0344, 0.1694, 0.1727, 0.1812, 0.3256, 0.3311, 0.3272]) + + assert np.abs(expected_slice - image_slice).max() < 5e-3 + + def test_stable_diffusion_inpaint_k_lms(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.9314, 0.7575, 0.9432, 0.8885, 0.9028, 0.7298, 0.9811, 0.9667, 0.7633]) + + assert np.abs(expected_slice - image_slice).max() < 6e-3 + + def test_stable_diffusion_inpaint_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None, torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + _ = pipe(**inputs) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.2 GB is allocated + assert mem_bytes < 2.2 * 10**9 + + def test_stable_diffusion_inpaint_pil_input_resolution_test(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + # change input image to a random size (one that would cause a tensor mismatch error) + inputs["image"] = inputs["image"].resize((127, 127)) + inputs["mask_image"] = inputs["mask_image"].resize((127, 127)) + inputs["height"] = 128 + inputs["width"] = 128 + image = pipe(**inputs).images + # verify that the returned image has the same height and width as the input height and width + assert image.shape == (1, inputs["height"], inputs["width"], 3) + + def test_stable_diffusion_inpaint_strength_test(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.unet.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + # change input strength + inputs["strength"] = 0.75 + image = pipe(**inputs).images + # verify that the returned image has the same height and width as the input height and width + assert image.shape == (1, 512, 512, 3) + + image_slice = image[0, 253:256, 253:256, -1].flatten() + expected_slice = np.array([0.2728, 0.2803, 0.2665, 0.2511, 0.2774, 0.2586, 0.2391, 0.2392, 0.2582]) + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_simple_inpaint_ddim(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None + ) + pipe.unet.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.3757, 0.3875, 0.4445, 0.4353, 0.3780, 0.4513, 0.3965, 0.3984, 0.4362]) + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + +@slow +@require_torch_accelerator +class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_mask.png" + ) + inputs = { + "prompt": "Face of a yellow cat, high resolution, sitting on a park bench", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_inpaint_ddim(self): + vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.vae = vae + pipe.unet.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.0522, 0.0604, 0.0596, 0.0449, 0.0493, 0.0427, 0.1186, 0.1289, 0.1442]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_inpaint_fp16(self): + vae = AsymmetricAutoencoderKL.from_pretrained( + "cross-attention/asymmetric-autoencoder-kl-x-1-5", torch_dtype=torch.float16 + ) + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", torch_dtype=torch.float16, safety_checker=None + ) + pipe.unet.set_default_attn_processor() + pipe.vae = vae + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slices = Expectations( + { + ("xpu", 3): np.array( + [ + 0.2063, + 0.1731, + 0.1553, + 0.1741, + 0.1772, + 0.1077, + 0.2109, + 0.2407, + 0.1243, + ] + ), + ("cuda", 7): np.array( + [ + 0.1343, + 0.1406, + 0.1440, + 0.1504, + 0.1729, + 0.0989, + 0.1807, + 0.2822, + 0.1179, + ] + ), + } + ) + expected_slice = expected_slices.get_expectation() + + assert np.abs(expected_slice - image_slice).max() < 5e-2 + + def test_stable_diffusion_inpaint_pndm(self): + vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.unet.set_default_attn_processor() + pipe.vae = vae + pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.0966, 0.1083, 0.1148, 0.1422, 0.1318, 0.1197, 0.3702, 0.3537, 0.3288]) + + assert np.abs(expected_slice - image_slice).max() < 5e-3 + + def test_stable_diffusion_inpaint_k_lms(self): + vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.unet.set_default_attn_processor() + pipe.vae = vae + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.8931, 0.8683, 0.8965, 0.8501, 0.8592, 0.9118, 0.8734, 0.7463, 0.8990]) + assert np.abs(expected_slice - image_slice).max() < 6e-3 + + def test_stable_diffusion_inpaint_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + vae = AsymmetricAutoencoderKL.from_pretrained( + "cross-attention/asymmetric-autoencoder-kl-x-1-5", torch_dtype=torch.float16 + ) + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None, torch_dtype=torch.float16 + ) + pipe.vae = vae + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + _ = pipe(**inputs) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.45 GB is allocated + assert mem_bytes < 2.45 * 10**9 + + def test_stable_diffusion_inpaint_pil_input_resolution_test(self): + vae = AsymmetricAutoencoderKL.from_pretrained( + "cross-attention/asymmetric-autoencoder-kl-x-1-5", + ) + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.vae = vae + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + # change input image to a random size (one that would cause a tensor mismatch error) + inputs["image"] = inputs["image"].resize((127, 127)) + inputs["mask_image"] = inputs["mask_image"].resize((127, 127)) + inputs["height"] = 128 + inputs["width"] = 128 + image = pipe(**inputs).images + # verify that the returned image has the same height and width as the input height and width + assert image.shape == (1, inputs["height"], inputs["width"], 3) + + def test_stable_diffusion_inpaint_strength_test(self): + vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "botp/stable-diffusion-v1-5-inpainting", safety_checker=None + ) + pipe.unet.set_default_attn_processor() + pipe.vae = vae + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + # change input strength + inputs["strength"] = 0.75 + image = pipe(**inputs).images + # verify that the returned image has the same height and width as the input height and width + assert image.shape == (1, 512, 512, 3) + + image_slice = image[0, 253:256, 253:256, -1].flatten() + expected_slice = np.array([0.2458, 0.2576, 0.3124, 0.2679, 0.2669, 0.2796, 0.2872, 0.2975, 0.2661]) + assert np.abs(expected_slice - image_slice).max() < 3e-3 + + def test_stable_diffusion_simple_inpaint_ddim(self): + vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None + ) + pipe.vae = vae + pipe.unet.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.3296, 0.4041, 0.4097, 0.4145, 0.4342, 0.4152, 0.4927, 0.4931, 0.4430]) + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_download_local(self): + vae = AsymmetricAutoencoderKL.from_pretrained( + "cross-attention/asymmetric-autoencoder-kl-x-1-5", torch_dtype=torch.float16 + ) + filename = hf_hub_download("botp/stable-diffusion-v1-5-inpainting", filename="sd-v1-5-inpainting.ckpt") + + pipe = StableDiffusionInpaintPipeline.from_single_file(filename, torch_dtype=torch.float16) + pipe.vae = vae + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 1 + image_out = pipe(**inputs).images[0] + + assert image_out.shape == (512, 512, 3) + + +@nightly +@require_torch_accelerator +class StableDiffusionInpaintPipelineNightlyTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_mask.png" + ) + inputs = { + "prompt": "Face of a yellow cat, high resolution, sitting on a park bench", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 50, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_inpaint_ddim(self): + sd_pipe = StableDiffusionInpaintPipeline.from_pretrained("botp/stable-diffusion-v1-5-inpainting") + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/stable_diffusion_inpaint_ddim.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_inpaint_pndm(self): + sd_pipe = StableDiffusionInpaintPipeline.from_pretrained("botp/stable-diffusion-v1-5-inpainting") + sd_pipe.scheduler = PNDMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/stable_diffusion_inpaint_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_inpaint_lms(self): + sd_pipe = StableDiffusionInpaintPipeline.from_pretrained("botp/stable-diffusion-v1-5-inpainting") + sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/stable_diffusion_inpaint_lms.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_inpaint_dpm(self): + sd_pipe = StableDiffusionInpaintPipeline.from_pretrained("botp/stable-diffusion-v1-5-inpainting") + sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 30 + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/stable_diffusion_inpaint_dpm_multi.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py new file mode 100644 index 0000000000000000000000000000000000000000..4758c5dab44b4618bd96e52040918d6b66bebe38 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py @@ -0,0 +1,431 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + EulerAncestralDiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusionInstructPix2PixPipeline, + UNet2DConditionModel, +) +from diffusers.image_processor import VaeImageProcessor + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionInstructPix2PixPipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionInstructPix2PixPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width", "cross_attention_kwargs"} + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"image_latents"}) - {"negative_prompt_embeds"} + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=8, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image.cpu().permute(0, 2, 3, 1)[0] + image = Image.fromarray(np.uint8(image)).convert("RGB") + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "image_guidance_scale": 1, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_pix2pix_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInstructPix2PixPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.7526, 0.3750, 0.4547, 0.6117, 0.5866, 0.5016, 0.4327, 0.5642, 0.4815]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_pix2pix_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInstructPix2PixPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + negative_prompt = "french fries" + output = sd_pipe(**inputs, negative_prompt=negative_prompt) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.7511, 0.3642, 0.4553, 0.6236, 0.5797, 0.5013, 0.4343, 0.5611, 0.4831]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_pix2pix_multiple_init_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInstructPix2PixPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * 2 + + image = np.array(inputs["image"]).astype(np.float32) / 255.0 + image = torch.from_numpy(image).unsqueeze(0).to(device) + image = image / 2 + 0.5 + image = image.permute(0, 3, 1, 2) + inputs["image"] = image.repeat(2, 1, 1, 1) + + image = sd_pipe(**inputs).images + image_slice = image[-1, -3:, -3:, -1] + + assert image.shape == (2, 32, 32, 3) + expected_slice = np.array([0.5812, 0.5748, 0.5222, 0.5908, 0.5695, 0.7174, 0.6804, 0.5523, 0.5579]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_pix2pix_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = EulerAncestralDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) + sd_pipe = StableDiffusionInstructPix2PixPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.7417, 0.3842, 0.4732, 0.5776, 0.5891, 0.5139, 0.4052, 0.5673, 0.4986]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + # Overwrite the default test_latents_inputs because pix2pix encode the image differently + def test_latents_input(self): + components = self.get_dummy_components() + pipe = StableDiffusionInstructPix2PixPipeline(**components) + pipe.image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + out = pipe(**self.get_dummy_inputs_by_type(torch_device, input_image_type="pt"))[0] + + vae = components["vae"] + inputs = self.get_dummy_inputs_by_type(torch_device, input_image_type="pt") + + for image_param in self.image_latents_params: + if image_param in inputs.keys(): + inputs[image_param] = vae.encode(inputs[image_param]).latent_dist.mode() + + out_latents_inputs = pipe(**inputs)[0] + + max_diff = np.abs(out - out_latents_inputs).max() + self.assertLess(max_diff, 1e-4, "passing latents as image input generate different result from passing image") + + # Override the default test_callback_cfg because pix2pix create inputs for cfg differently + def test_callback_cfg(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + def callback_no_cfg(pipe, i, t, callback_kwargs): + if i == 1: + for k, w in callback_kwargs.items(): + if k in self.callback_cfg_params: + callback_kwargs[k] = callback_kwargs[k].chunk(3)[0] + pipe._guidance_scale = 1.0 + + return callback_kwargs + + inputs = self.get_dummy_inputs(torch_device) + inputs["guidance_scale"] = 1.0 + inputs["num_inference_steps"] = 2 + out_no_cfg = pipe(**inputs)[0] + + inputs["guidance_scale"] = 7.5 + inputs["callback_on_step_end"] = callback_no_cfg + inputs["callback_on_step_end_tensor_inputs"] = pipe._callback_tensor_inputs + out_callback_no_cfg = pipe(**inputs)[0] + + assert out_no_cfg.shape == out_callback_no_cfg.shape + + +@slow +@require_torch_accelerator +class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, seed=0): + generator = torch.manual_seed(seed) + image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_pix2pix/example.jpg" + ) + inputs = { + "prompt": "turn him into a cyborg", + "image": image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "image_guidance_scale": 1.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_pix2pix_default(self): + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( + "timbrooks/instruct-pix2pix", safety_checker=None + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.5902, 0.6015, 0.6027, 0.5983, 0.6092, 0.6061, 0.5765, 0.5785, 0.5555]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_pix2pix_k_lms(self): + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( + "timbrooks/instruct-pix2pix", safety_checker=None + ) + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.6578, 0.6817, 0.6972, 0.6761, 0.6856, 0.6916, 0.6428, 0.6516, 0.6301]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_pix2pix_ddim(self): + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( + "timbrooks/instruct-pix2pix", safety_checker=None + ) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs() + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.3828, 0.3834, 0.3818, 0.3792, 0.3865, 0.3752, 0.3792, 0.3847, 0.3753]) + + assert np.abs(expected_slice - image_slice).max() < 1e-3 + + def test_stable_diffusion_pix2pix_intermediate_state(self): + number_of_steps = 0 + + def callback_fn(step: int, timestep: int, latents: torch.Tensor) -> None: + callback_fn.has_been_called = True + nonlocal number_of_steps + number_of_steps += 1 + if step == 1: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([-0.2463, -0.4644, -0.9756, 1.5176, 1.4414, 0.7866, 0.9897, 0.8521, 0.7983]) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + elif step == 2: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([-0.2644, -0.4626, -0.9653, 1.5176, 1.4551, 0.7686, 0.9805, 0.8452, 0.8115]) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + + callback_fn.has_been_called = False + + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( + "timbrooks/instruct-pix2pix", safety_checker=None, torch_dtype=torch.float16 + ) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs() + pipe(**inputs, callback=callback_fn, callback_steps=1) + assert callback_fn.has_been_called + assert number_of_steps == 3 + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( + "timbrooks/instruct-pix2pix", safety_checker=None, torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_inputs() + _ = pipe(**inputs) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.2 GB is allocated + assert mem_bytes < 2.2 * 10**9 + + def test_stable_diffusion_pix2pix_pipeline_multiple_of_8(self): + inputs = self.get_inputs() + # resize to resolution that is divisible by 8 but not 16 or 32 + inputs["image"] = inputs["image"].resize((504, 504)) + + model_id = "timbrooks/instruct-pix2pix" + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( + model_id, + safety_checker=None, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + output = pipe(**inputs) + image = output.images[0] + + image_slice = image[255:258, 383:386, -1] + + assert image.shape == (504, 504, 3) + expected_slice = np.array([0.2726, 0.2529, 0.2664, 0.2655, 0.2641, 0.2642, 0.2591, 0.2649, 0.2590]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..3b2552b432d3eaff31c637312b26ab10a38155dd --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -0,0 +1,437 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusionPipeline, + UNet2DConditionModel, + logging, +) + +from ...testing_utils import ( + CaptureLogger, + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + load_numpy, + nightly, + numpy_cosine_similarity_distance, + require_torch_accelerator, + skip_mps, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import ( + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + SDFunctionTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusion2PipelineFastTests( + SDFunctionTesterMixin, + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionPipeline + params = TEXT_TO_IMAGE_PARAMS + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + ) + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=512, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + generator_device = "cpu" if not device.startswith("cuda") else "cuda" + if not str(device).startswith("mps"): + generator = torch.Generator(device=generator_device).manual_seed(seed) + else: + generator = torch.manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_ddim(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5753, 0.6113, 0.5005, 0.5036, 0.5464, 0.4725, 0.4982, 0.4865, 0.4861]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_pndm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = PNDMScheduler(skip_prk_steps=True) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5121, 0.5714, 0.4827, 0.5057, 0.5646, 0.4766, 0.5189, 0.4895, 0.4990]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_k_lms(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4865, 0.5439, 0.4840, 0.4995, 0.5543, 0.4846, 0.5199, 0.4942, 0.5061]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_k_euler_ancestral(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = EulerAncestralDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4864, 0.5440, 0.4842, 0.4994, 0.5543, 0.4846, 0.5196, 0.4942, 0.5063]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_k_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = EulerDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4865, 0.5439, 0.4840, 0.4995, 0.5543, 0.4846, 0.5199, 0.4942, 0.5061]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_unflawed(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + components["scheduler"] = DDIMScheduler.from_config( + components["scheduler"].config, timestep_spacing="trailing" + ) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["guidance_rescale"] = 0.7 + inputs["num_inference_steps"] = 10 + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4736, 0.5405, 0.4705, 0.4955, 0.5675, 0.4812, 0.5310, 0.4967, 0.5064]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_long_prompt(self): + components = self.get_dummy_components() + components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config) + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + do_classifier_free_guidance = True + negative_prompt = None + num_images_per_prompt = 1 + logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion") + logger.setLevel(logging.WARNING) + + prompt = 25 * "@" + with CaptureLogger(logger) as cap_logger_3: + text_embeddings_3, negeative_text_embeddings_3 = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negeative_text_embeddings_3 is not None: + text_embeddings_3 = torch.cat([negeative_text_embeddings_3, text_embeddings_3]) + + prompt = 100 * "@" + with CaptureLogger(logger) as cap_logger: + text_embeddings, negative_embeddings = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negative_embeddings is not None: + text_embeddings = torch.cat([negative_embeddings, text_embeddings]) + + negative_prompt = "Hello" + with CaptureLogger(logger) as cap_logger_2: + text_embeddings_2, negative_text_embeddings_2 = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negative_text_embeddings_2 is not None: + text_embeddings_2 = torch.cat([negative_text_embeddings_2, text_embeddings_2]) + + assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape + assert text_embeddings.shape[1] == 77 + + assert cap_logger.out == cap_logger_2.out + # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25 + assert cap_logger.out.count("@") == 25 + assert cap_logger_3.out == "" + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +@skip_mps +class StableDiffusion2PipelineSlowTests(unittest.TestCase): + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + if not str(device).startswith("mps"): + generator = torch.Generator(device=generator_device).manual_seed(seed) + else: + generator = torch.manual_seed(seed) + + latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "prompt": "a photograph of an astronaut riding a horse", + "latents": latents, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_default_ddim(self): + pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-base") + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.49493, 0.47896, 0.40798, 0.54214, 0.53212, 0.48202, 0.47656, 0.46329, 0.48506]) + assert np.abs(image_slice - expected_slice).max() < 7e-3 + + @require_torch_accelerator + def test_stable_diffusion_attention_slicing(self): + backend_reset_peak_memory_stats(torch_device) + pipe = StableDiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16 + ) + pipe.unet.set_default_attn_processor() + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + # enable attention slicing + pipe.enable_attention_slicing() + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image_sliced = pipe(**inputs).images + + mem_bytes = backend_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + # make sure that less than 3.3 GB is allocated + assert mem_bytes < 3.3 * 10**9 + + # disable slicing + pipe.disable_attention_slicing() + pipe.unet.set_default_attn_processor() + inputs = self.get_inputs(torch_device, dtype=torch.float16) + image = pipe(**inputs).images + + # make sure that more than 3.3 GB is allocated + mem_bytes = backend_max_memory_allocated(torch_device) + assert mem_bytes > 3.3 * 10**9 + max_diff = numpy_cosine_similarity_distance(image.flatten(), image_sliced.flatten()) + assert max_diff < 5e-3 + + +@nightly +@require_torch_accelerator +@skip_mps +class StableDiffusion2PipelineNightlyTests(unittest.TestCase): + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + _generator_device = "cpu" if not generator_device.startswith("cuda") else "cuda" + if not str(device).startswith("mps"): + generator = torch.Generator(device=_generator_device).manual_seed(seed) + else: + generator = torch.manual_seed(seed) + + latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "prompt": "a photograph of an astronaut riding a horse", + "latents": latents, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_2_1_default(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base").to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_2_text2img/stable_diffusion_2_0_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..bea7c099046f6a6098fa07e87d9f90ec303f68d5 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py @@ -0,0 +1,470 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import tempfile +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import ( + CLIPTextConfig, + CLIPTextModel, + CLIPTokenizer, + DPTConfig, + DPTForDepthEstimation, + DPTImageProcessor, +) + +from diffusers import ( + AutoencoderKL, + PNDMScheduler, + StableDiffusionDepth2ImgPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + nightly, + require_accelerate_version_greater, + require_accelerator, + require_torch_accelerator, + skip_mps, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, +) +from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin + + +enable_full_determinism() + + +@skip_mps +class StableDiffusionDepth2ImgPipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionDepth2ImgPipeline + test_save_load_optional_components = False + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"depth_mask"}) + + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=5, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + attention_head_dim=(2, 4), + use_linear_projection=True, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + backbone_config = { + "global_padding": "same", + "layer_type": "bottleneck", + "depths": [3, 4, 9], + "out_features": ["stage1", "stage2", "stage3"], + "embedding_dynamic_padding": True, + "hidden_sizes": [96, 192, 384, 768], + "num_groups": 2, + } + depth_estimator_config = DPTConfig( + image_size=32, + patch_size=16, + num_channels=3, + hidden_size=32, + num_hidden_layers=4, + backbone_out_indices=(0, 1, 2, 3), + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + is_decoder=False, + initializer_range=0.02, + is_hybrid=True, + backbone_config=backbone_config, + backbone_featmap_shape=[1, 384, 24, 24], + ) + depth_estimator = DPTForDepthEstimation(depth_estimator_config).eval() + feature_extractor = DPTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-DPTForDepthEstimation") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "depth_estimator": depth_estimator, + "feature_extractor": feature_extractor, + } + return components + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)) + image = image.cpu().permute(0, 2, 3, 1)[0] + image = Image.fromarray(np.uint8(image)).convert("RGB").resize((32, 32)) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_save_load_local(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output - output_loaded).max() + self.assertLess(max_diff, 1e-4) + + @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") + @require_accelerator + def test_save_load_float16(self): + components = self.get_dummy_components() + for name, module in components.items(): + if hasattr(module, "half"): + components[name] = module.to(torch_device).half() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16) + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for name, component in pipe_loaded.components.items(): + if hasattr(component, "dtype"): + self.assertTrue( + component.dtype == torch.float16, + f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.", + ) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output - output_loaded).max() + self.assertLess(max_diff, 2e-2, "The output of the fp16 pipeline changed after saving and loading.") + + @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") + @require_accelerator + def test_float16_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + for name, module in components.items(): + if hasattr(module, "half"): + components[name] = module.half() + pipe_fp16 = self.pipeline_class(**components) + pipe_fp16.to(torch_device) + pipe_fp16.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(torch_device))[0] + output_fp16 = pipe_fp16(**self.get_dummy_inputs(torch_device))[0] + + max_diff = np.abs(output - output_fp16).max() + self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.") + + @require_accelerator + @require_accelerate_version_greater("0.14.0") + def test_cpu_offload_forward_pass(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_without_offload = pipe(**inputs)[0] + + pipe.enable_sequential_cpu_offload(device=torch_device) + inputs = self.get_dummy_inputs(torch_device) + output_with_offload = pipe(**inputs)[0] + + max_diff = np.abs(output_with_offload - output_without_offload).max() + self.assertLess(max_diff, 1e-4, "CPU offloading should not affect the inference results") + + def test_dict_tuple_outputs_equivalent(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + output = pipe(**self.get_dummy_inputs(torch_device))[0] + output_tuple = pipe(**self.get_dummy_inputs(torch_device), return_dict=False)[0] + + max_diff = np.abs(output - output_tuple).max() + self.assertLess(max_diff, 1e-4) + + def test_stable_diffusion_depth2img_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = StableDiffusionDepth2ImgPipeline(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + if torch_device == "mps": + expected_slice = np.array([0.6071, 0.5035, 0.4378, 0.5776, 0.5753, 0.4316, 0.4513, 0.5263, 0.4546]) + else: + expected_slice = np.array([0.5435, 0.4992, 0.3783, 0.4411, 0.5842, 0.4654, 0.3786, 0.5077, 0.4655]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_depth2img_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = StableDiffusionDepth2ImgPipeline(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + negative_prompt = "french fries" + output = pipe(**inputs, negative_prompt=negative_prompt) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + if torch_device == "mps": + expected_slice = np.array([0.6296, 0.5125, 0.3890, 0.4456, 0.5955, 0.4621, 0.3810, 0.5310, 0.4626]) + else: + expected_slice = np.array([0.6012, 0.4507, 0.3769, 0.4121, 0.5566, 0.4585, 0.3803, 0.5045, 0.4631]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_depth2img_multiple_init_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = StableDiffusionDepth2ImgPipeline(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * 2 + inputs["image"] = 2 * [inputs["image"]] + image = pipe(**inputs).images + image_slice = image[-1, -3:, -3:, -1] + + assert image.shape == (2, 32, 32, 3) + + if torch_device == "mps": + expected_slice = np.array([0.6501, 0.5150, 0.4939, 0.6688, 0.5437, 0.5758, 0.5115, 0.4406, 0.4551]) + else: + expected_slice = np.array([0.6557, 0.6214, 0.6254, 0.5775, 0.4785, 0.5949, 0.5904, 0.4785, 0.4730]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_depth2img_pil(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = StableDiffusionDepth2ImgPipeline(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + if torch_device == "mps": + expected_slice = np.array([0.53232, 0.47015, 0.40868, 0.45651, 0.4891, 0.4668, 0.4287, 0.48822, 0.47439]) + else: + expected_slice = np.array([0.5435, 0.4992, 0.3783, 0.4411, 0.5842, 0.4654, 0.3786, 0.5077, 0.4655]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + @skip_mps + def test_attention_slicing_forward_pass(self): + return super().test_attention_slicing_forward_pass() + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=7e-3) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png" + ) + inputs = { + "prompt": "two tigers", + "image": init_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_depth2img_pipeline_default(self): + pipe = StableDiffusionDepth2ImgPipeline.from_pretrained( + "stabilityai/stable-diffusion-2-depth", safety_checker=None + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + inputs = self.get_inputs() + image = pipe(**inputs).images + image_slice = image[0, 253:256, 253:256, -1].flatten() + + assert image.shape == (1, 480, 640, 3) + expected_slice = np.array([0.5435, 0.4992, 0.3783, 0.4411, 0.5842, 0.4654, 0.3786, 0.5077, 0.4655]) + + assert np.abs(expected_slice - image_slice).max() < 6e-1 + + +@nightly +@require_torch_accelerator +class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png" + ) + inputs = { + "prompt": "two tigers", + "image": init_image, + "generator": generator, + "num_inference_steps": 2, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_depth2img(self): + pipe = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-2-depth") + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs() + image = pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_depth2img/stable_diffusion_2_0_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..f010c1b03fe338803b63026d63bfb5466840264c --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py @@ -0,0 +1,293 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import AutoencoderKL, PNDMScheduler, StableDiffusionInpaintPipeline, UNet2DConditionModel + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin + + +enable_full_determinism() + + +class StableDiffusion2InpaintPipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionInpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset( + [] + ) # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"mask", "masked_image_latents"}) + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=9, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=512, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((64, 64)) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_inpaint(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionInpaintPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4727, 0.5735, 0.3941, 0.5446, 0.5926, 0.4394, 0.5062, 0.4654, 0.4476]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +@slow +@require_torch_accelerator +class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_diffusion_inpaint_pipeline(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-inpaint/init_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint" + "/yellow_cat_sitting_on_a_park_bench.npy" + ) + + model_id = "stabilityai/stable-diffusion-2-inpainting" + pipe = StableDiffusionInpaintPipeline.from_pretrained(model_id, safety_checker=None) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "Face of a yellow cat, high resolution, sitting on a park bench" + + generator = torch.manual_seed(0) + output = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (512, 512, 3) + assert np.abs(expected_image - image).max() < 9e-3 + + def test_stable_diffusion_inpaint_pipeline_fp16(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-inpaint/init_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint" + "/yellow_cat_sitting_on_a_park_bench_fp16.npy" + ) + + model_id = "stabilityai/stable-diffusion-2-inpainting" + pipe = StableDiffusionInpaintPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16, + safety_checker=None, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "Face of a yellow cat, high resolution, sitting on a park bench" + + generator = torch.manual_seed(0) + output = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (512, 512, 3) + assert np.abs(expected_image - image).max() < 5e-1 + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-inpaint/init_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png" + ) + + model_id = "stabilityai/stable-diffusion-2-inpainting" + pndm = PNDMScheduler.from_pretrained(model_id, subfolder="scheduler") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + model_id, + safety_checker=None, + scheduler=pndm, + torch_dtype=torch.float16, + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + prompt = "Face of a yellow cat, high resolution, sitting on a park bench" + + generator = torch.manual_seed(0) + _ = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + generator=generator, + num_inference_steps=2, + output_type="np", + ) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.65 GB is allocated + assert mem_bytes < 2.65 * 10**9 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py new file mode 100644 index 0000000000000000000000000000000000000000..2e4b428dfeb53493da88410570c0797f03fed126 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py @@ -0,0 +1,356 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +import diffusers +from diffusers import ( + AutoencoderKL, + EulerDiscreteScheduler, + StableDiffusionLatentUpscalePipeline, + StableDiffusionPipeline, + UNet2DConditionModel, +) +from diffusers.schedulers import KarrasDiffusionSchedulers + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS +from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin + + +enable_full_determinism() + + +def check_same_shape(tensor_list): + shapes = [tensor.shape for tensor in tensor_list] + return all(shape == shapes[0] for shape in shapes[1:]) + + +class StableDiffusionLatentUpscalePipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionLatentUpscalePipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - { + "height", + "width", + "cross_attention_kwargs", + "negative_prompt_embeds", + "prompt_embeds", + } + required_optional_params = PipelineTesterMixin.required_optional_params - {"num_images_per_prompt"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = frozenset( + [] + ) # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + + @property + def dummy_image(self): + batch_size = 1 + num_channels = 4 + sizes = (16, 16) + + image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device) + return image + + def get_dummy_components(self): + torch.manual_seed(0) + model = UNet2DConditionModel( + act_fn="gelu", + attention_head_dim=8, + norm_num_groups=None, + block_out_channels=[32, 32, 64, 64], + time_cond_proj_dim=160, + conv_in_kernel=1, + conv_out_kernel=1, + cross_attention_dim=32, + down_block_types=( + "KDownBlock2D", + "KCrossAttnDownBlock2D", + "KCrossAttnDownBlock2D", + "KCrossAttnDownBlock2D", + ), + in_channels=8, + mid_block_type=None, + only_cross_attention=False, + out_channels=5, + resnet_time_scale_shift="scale_shift", + time_embedding_type="fourier", + timestep_post_act="gelu", + up_block_types=("KCrossAttnUpBlock2D", "KCrossAttnUpBlock2D", "KCrossAttnUpBlock2D", "KUpBlock2D"), + ) + vae = AutoencoderKL( + block_out_channels=[32, 32, 64, 64], + in_channels=3, + out_channels=3, + down_block_types=[ + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D", + "DownEncoderBlock2D", + ], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + scheduler = EulerDiscreteScheduler(prediction_type="sample") + text_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="quick_gelu", + projection_dim=512, + ) + text_encoder = CLIPTextModel(text_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": model.eval(), + "vae": vae.eval(), + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": self.dummy_image.cpu(), + "generator": generator, + "num_inference_steps": 2, + "output_type": "np", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + self.assertEqual(image.shape, (1, 256, 256, 3)) + expected_slice = np.array( + [0.47222412, 0.41921633, 0.44717434, 0.46874192, 0.42588258, 0.46150726, 0.4677534, 0.45583832, 0.48579055] + ) + max_diff = np.abs(image_slice.flatten() - expected_slice).max() + self.assertLessEqual(max_diff, 1e-3) + + def test_stable_diffusion_latent_upscaler_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionLatentUpscalePipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + negative_prompt = "french fries" + output = sd_pipe(**inputs, negative_prompt=negative_prompt) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 256, 256, 3) + expected_slice = np.array( + [0.43865365, 0.404124, 0.42618454, 0.44333526, 0.40564927, 0.43818694, 0.4411913, 0.43404633, 0.46392226] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_latent_upscaler_multiple_init_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionLatentUpscalePipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * 2 + inputs["image"] = inputs["image"].repeat(2, 1, 1, 1) + image = sd_pipe(**inputs).images + image_slice = image[-1, -3:, -3:, -1] + + assert image.shape == (2, 256, 256, 3) + expected_slice = np.array( + [0.38730142, 0.35695046, 0.40646142, 0.40967226, 0.3981609, 0.4195988, 0.4248805, 0.430259, 0.45694894] + ) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=7e-3) + + def test_sequential_cpu_offload_forward_pass(self): + super().test_sequential_cpu_offload_forward_pass(expected_max_diff=3e-3) + + def test_dict_tuple_outputs_equivalent(self): + super().test_dict_tuple_outputs_equivalent(expected_max_difference=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=7e-3) + + def test_pt_np_pil_outputs_equivalent(self): + super().test_pt_np_pil_outputs_equivalent(expected_max_diff=3e-3) + + def test_save_load_local(self): + super().test_save_load_local(expected_max_difference=3e-3) + + def test_save_load_optional_components(self): + super().test_save_load_optional_components(expected_max_difference=3e-3) + + def test_karras_schedulers_shape(self): + skip_schedulers = [ + "DDIMScheduler", + "DDPMScheduler", + "PNDMScheduler", + "HeunDiscreteScheduler", + "EulerAncestralDiscreteScheduler", + "KDPM2DiscreteScheduler", + "KDPM2AncestralDiscreteScheduler", + "DPMSolverSDEScheduler", + "EDMEulerScheduler", + ] + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + # make sure that PNDM does not need warm-up + pipe.scheduler.register_to_config(skip_prk_steps=True) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 2 + + outputs = [] + for scheduler_enum in KarrasDiffusionSchedulers: + if scheduler_enum.name in skip_schedulers: + # no sigma schedulers are not supported + # no schedulers + continue + + scheduler_cls = getattr(diffusers, scheduler_enum.name) + pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config) + output = pipe(**inputs)[0] + outputs.append(output) + + assert check_same_shape(outputs) + + def test_float16_inference(self): + super().test_float16_inference(expected_max_diff=5e-1) + + @unittest.skip("Test not supported for a weird use of `text_input_ids`.") + def test_encode_prompt_works_in_isolation(self): + pass + + +@require_torch_accelerator +@slow +class StableDiffusionLatentUpscalePipelineIntegrationTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_latent_upscaler_fp16(self): + generator = torch.manual_seed(33) + + pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) + pipe.to(torch_device) + + upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained( + "stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16 + ) + upscaler.to(torch_device) + + prompt = "a photo of an astronaut high resolution, unreal engine, ultra realistic" + + low_res_latents = pipe(prompt, generator=generator, output_type="latent").images + + image = upscaler( + prompt=prompt, + image=low_res_latents, + num_inference_steps=20, + guidance_scale=0, + generator=generator, + output_type="np", + ).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/latent-upscaler/astronaut_1024.npy" + ) + assert np.abs((expected_image - image).mean()) < 5e-2 + + def test_latent_upscaler_fp16_image(self): + generator = torch.manual_seed(33) + + upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained( + "stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16 + ) + upscaler.to(torch_device) + + prompt = "the temple of fire by Ross Tran and Gerardo Dottori, oil on canvas" + + low_res_img = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/latent-upscaler/fire_temple_512.png" + ) + + image = upscaler( + prompt=prompt, + image=low_res_img, + num_inference_steps=20, + guidance_scale=0, + generator=generator, + output_type="np", + ).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/latent-upscaler/fire_temple_1024.npy" + ) + assert np.abs((expected_image - image).max()) < 5e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py new file mode 100644 index 0000000000000000000000000000000000000000..481ac7f2d10f7e24215c709e9e9e2d0cd7d50186 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py @@ -0,0 +1,498 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import tempfile +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import AutoencoderKL, DDIMScheduler, DDPMScheduler, StableDiffusionUpscalePipeline, UNet2DConditionModel + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + require_accelerator, + require_torch_accelerator, + slow, + torch_device, +) + + +enable_full_determinism() + + +class StableDiffusionUpscalePipelineFastTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + @property + def dummy_image(self): + batch_size = 1 + num_channels = 3 + sizes = (32, 32) + + image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device) + return image + + @property + def dummy_cond_unet_upscale(self): + torch.manual_seed(0) + model = UNet2DConditionModel( + block_out_channels=(32, 32, 64), + layers_per_block=2, + sample_size=32, + in_channels=7, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + # SD2-specific config below + attention_head_dim=8, + use_linear_projection=True, + only_cross_attention=(True, True, False), + num_class_embeds=100, + ) + return model + + @property + def dummy_vae(self): + torch.manual_seed(0) + model = AutoencoderKL( + block_out_channels=[32, 32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + return model + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=512, + ) + return CLIPTextModel(config) + + def test_stable_diffusion_upscale(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet_upscale + low_res_scheduler = DDPMScheduler() + scheduler = DDIMScheduler(prediction_type="v_prediction") + vae = self.dummy_vae + text_encoder = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + low_res_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionUpscalePipeline( + unet=unet, + low_res_scheduler=low_res_scheduler, + scheduler=scheduler, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + max_noise_level=350, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + [prompt], + image=low_res_image, + generator=generator, + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + ) + + image = output.images + + generator = torch.Generator(device=device).manual_seed(0) + image_from_tuple = sd_pipe( + [prompt], + image=low_res_image, + generator=generator, + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + return_dict=False, + )[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + expected_height_width = low_res_image.size[0] * 4 + assert image.shape == (1, expected_height_width, expected_height_width, 3) + expected_slice = np.array([0.3113, 0.3910, 0.4272, 0.4859, 0.5061, 0.4652, 0.5362, 0.5715, 0.5661]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_upscale_batch(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet_upscale + low_res_scheduler = DDPMScheduler() + scheduler = DDIMScheduler(prediction_type="v_prediction") + vae = self.dummy_vae + text_encoder = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + low_res_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionUpscalePipeline( + unet=unet, + low_res_scheduler=low_res_scheduler, + scheduler=scheduler, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + max_noise_level=350, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + output = sd_pipe( + 2 * [prompt], + image=2 * [low_res_image], + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + ) + image = output.images + assert image.shape[0] == 2 + + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + [prompt], + image=low_res_image, + generator=generator, + num_images_per_prompt=2, + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + ) + image = output.images + assert image.shape[0] == 2 + + def test_stable_diffusion_upscale_prompt_embeds(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet_upscale + low_res_scheduler = DDPMScheduler() + scheduler = DDIMScheduler(prediction_type="v_prediction") + vae = self.dummy_vae + text_encoder = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + low_res_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionUpscalePipeline( + unet=unet, + low_res_scheduler=low_res_scheduler, + scheduler=scheduler, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + max_noise_level=350, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + [prompt], + image=low_res_image, + generator=generator, + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + ) + + image = output.images + + generator = torch.Generator(device=device).manual_seed(0) + prompt_embeds, negative_prompt_embeds = sd_pipe.encode_prompt(prompt, device, 1, False) + if negative_prompt_embeds is not None: + prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) + + image_from_prompt_embeds = sd_pipe( + prompt_embeds=prompt_embeds, + image=[low_res_image], + generator=generator, + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + return_dict=False, + )[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_prompt_embeds_slice = image_from_prompt_embeds[0, -3:, -3:, -1] + + expected_height_width = low_res_image.size[0] * 4 + assert image.shape == (1, expected_height_width, expected_height_width, 3) + expected_slice = np.array([0.3113, 0.3910, 0.4272, 0.4859, 0.5061, 0.4652, 0.5362, 0.5715, 0.5661]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_prompt_embeds_slice.flatten() - expected_slice).max() < 1e-2 + + @require_accelerator + def test_stable_diffusion_upscale_fp16(self): + """Test that stable diffusion upscale works with fp16""" + unet = self.dummy_cond_unet_upscale + low_res_scheduler = DDPMScheduler() + scheduler = DDIMScheduler(prediction_type="v_prediction") + vae = self.dummy_vae + text_encoder = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + low_res_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + # put models in fp16, except vae as it overflows in fp16 + unet = unet.half() + text_encoder = text_encoder.half() + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionUpscalePipeline( + unet=unet, + low_res_scheduler=low_res_scheduler, + scheduler=scheduler, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + max_noise_level=350, + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.manual_seed(0) + image = sd_pipe( + [prompt], + image=low_res_image, + generator=generator, + num_inference_steps=2, + output_type="np", + ).images + + expected_height_width = low_res_image.size[0] * 4 + assert image.shape == (1, expected_height_width, expected_height_width, 3) + + def test_stable_diffusion_upscale_from_save_pretrained(self): + pipes = [] + + device = "cpu" # ensure determinism for the device-dependent torch.Generator + low_res_scheduler = DDPMScheduler() + scheduler = DDIMScheduler(prediction_type="v_prediction") + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionUpscalePipeline( + unet=self.dummy_cond_unet_upscale, + low_res_scheduler=low_res_scheduler, + scheduler=scheduler, + vae=self.dummy_vae, + text_encoder=self.dummy_text_encoder, + tokenizer=tokenizer, + max_noise_level=350, + ) + sd_pipe = sd_pipe.to(device) + pipes.append(sd_pipe) + + with tempfile.TemporaryDirectory() as tmpdirname: + sd_pipe.save_pretrained(tmpdirname) + sd_pipe = StableDiffusionUpscalePipeline.from_pretrained(tmpdirname).to(device) + pipes.append(sd_pipe) + + prompt = "A painting of a squirrel eating a burger" + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + low_res_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + image_slices = [] + for pipe in pipes: + generator = torch.Generator(device=device).manual_seed(0) + image = pipe( + [prompt], + image=low_res_image, + generator=generator, + guidance_scale=6.0, + noise_level=20, + num_inference_steps=2, + output_type="np", + ).images + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + + +@slow +@require_torch_accelerator +class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_diffusion_upscale_pipeline(self): + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-upscale/low_res_cat.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale" + "/upsampled_cat.npy" + ) + + model_id = "stabilityai/stable-diffusion-x4-upscaler" + pipe = StableDiffusionUpscalePipeline.from_pretrained(model_id) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "a cat sitting on a park bench" + + generator = torch.manual_seed(0) + output = pipe( + prompt=prompt, + image=image, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (512, 512, 3) + assert np.abs(expected_image - image).max() < 1e-3 + + def test_stable_diffusion_upscale_pipeline_fp16(self): + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-upscale/low_res_cat.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale" + "/upsampled_cat_fp16.npy" + ) + + model_id = "stabilityai/stable-diffusion-x4-upscaler" + pipe = StableDiffusionUpscalePipeline.from_pretrained( + model_id, + torch_dtype=torch.float16, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "a cat sitting on a park bench" + + generator = torch.manual_seed(0) + output = pipe( + prompt=prompt, + image=image, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (512, 512, 3) + assert np.abs(expected_image - image).max() < 5e-1 + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-upscale/low_res_cat.png" + ) + + model_id = "stabilityai/stable-diffusion-x4-upscaler" + pipe = StableDiffusionUpscalePipeline.from_pretrained( + model_id, + torch_dtype=torch.float16, + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + prompt = "a cat sitting on a park bench" + + generator = torch.manual_seed(0) + _ = pipe( + prompt=prompt, + image=image, + generator=generator, + num_inference_steps=5, + output_type="np", + ) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.9 GB is allocated + assert mem_bytes < 2.9 * 10**9 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py new file mode 100644 index 0000000000000000000000000000000000000000..37b309c4cac4ce8ed9bfb3a7613ccdff776ba0f7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py @@ -0,0 +1,554 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import time +import unittest + +import numpy as np +import torch +from huggingface_hub import hf_hub_download +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerDiscreteScheduler, + StableDiffusionPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + load_numpy, + numpy_cosine_similarity_distance, + require_accelerator, + require_torch_accelerator, + slow, + torch_device, +) + + +enable_full_determinism() + + +class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + @property + def dummy_cond_unet(self): + torch.manual_seed(0) + model = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + ) + return model + + @property + def dummy_vae(self): + torch.manual_seed(0) + model = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + return model + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=64, + ) + return CLIPTextModel(config) + + def test_stable_diffusion_v_pred_ddim(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + prediction_type="v_prediction", + ) + + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=None, + image_encoder=None, + requires_safety_checker=False, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + image = output.images + + generator = torch.Generator(device=device).manual_seed(0) + image_from_tuple = sd_pipe( + [prompt], + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + return_dict=False, + )[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.6569, 0.6525, 0.5142, 0.4968, 0.4923, 0.4601, 0.4996, 0.5041, 0.4544]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_v_pred_k_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", prediction_type="v_prediction" + ) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=None, + image_encoder=None, + requires_safety_checker=False, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + + image = output.images + + generator = torch.Generator(device=device).manual_seed(0) + image_from_tuple = sd_pipe( + [prompt], + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + return_dict=False, + )[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5644, 0.6514, 0.5190, 0.5663, 0.5287, 0.4953, 0.5430, 0.5243, 0.4778]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + + @require_accelerator + def test_stable_diffusion_v_pred_fp16(self): + """Test that stable diffusion v-prediction works with fp16""" + unet = self.dummy_cond_unet + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + prediction_type="v_prediction", + ) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # put models in fp16 + unet = unet.half() + vae = vae.half() + bert = bert.half() + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=None, + image_encoder=None, + requires_safety_checker=False, + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.manual_seed(0) + image = sd_pipe([prompt], generator=generator, num_inference_steps=2, output_type="np").images + + assert image.shape == (1, 64, 64, 3) + + +@slow +@require_torch_accelerator +class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_diffusion_v_pred_default(self): + sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2") + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.enable_attention_slicing() + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=20, output_type="np") + + image = output.images + image_slice = image[0, 253:256, 253:256, -1] + + assert image.shape == (1, 768, 768, 3) + expected_slice = np.array([0.1868, 0.1922, 0.1527, 0.1921, 0.1908, 0.1624, 0.1779, 0.1652, 0.1734]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_v_pred_upcast_attention(self): + sd_pipe = StableDiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16 + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.enable_attention_slicing() + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=20, output_type="np") + + image = output.images + image_slice = image[0, 253:256, 253:256, -1] + + assert image.shape == (1, 768, 768, 3) + expected_slice = np.array([0.4209, 0.4087, 0.4097, 0.4209, 0.3860, 0.4329, 0.4280, 0.4324, 0.4187]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-2 + + def test_stable_diffusion_v_pred_euler(self): + scheduler = EulerDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-2", subfolder="scheduler") + sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", scheduler=scheduler) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.enable_attention_slicing() + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.manual_seed(0) + + output = sd_pipe([prompt], generator=generator, num_inference_steps=5, output_type="np") + image = output.images + + image_slice = image[0, 253:256, 253:256, -1] + + assert image.shape == (1, 768, 768, 3) + expected_slice = np.array([0.1781, 0.1695, 0.1661, 0.1705, 0.1588, 0.1699, 0.2005, 0.1589, 0.1677]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_v_pred_dpm(self): + """ + TODO: update this test after making DPM compatible with V-prediction! + """ + scheduler = DPMSolverMultistepScheduler.from_pretrained( + "stabilityai/stable-diffusion-2", + subfolder="scheduler", + final_sigmas_type="sigma_min", + ) + sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", scheduler=scheduler) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.enable_attention_slicing() + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "a photograph of an astronaut riding a horse" + generator = torch.manual_seed(0) + image = sd_pipe( + [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=5, output_type="np" + ).images + + image_slice = image[0, 253:256, 253:256, -1] + assert image.shape == (1, 768, 768, 3) + expected_slice = np.array([0.3303, 0.3184, 0.3291, 0.3300, 0.3256, 0.3113, 0.2965, 0.3134, 0.3192]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_attention_slicing_v_pred(self): + backend_reset_peak_memory_stats(torch_device) + model_id = "stabilityai/stable-diffusion-2" + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "a photograph of an astronaut riding a horse" + + # make attention efficient + pipe.enable_attention_slicing() + generator = torch.manual_seed(0) + output_chunked = pipe( + [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="np" + ) + image_chunked = output_chunked.images + + mem_bytes = backend_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + # make sure that less than 5.5 GB is allocated + assert mem_bytes < 5.5 * 10**9 + + # disable slicing + pipe.disable_attention_slicing() + generator = torch.manual_seed(0) + output = pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="np") + image = output.images + + # make sure that more than 3.0 GB is allocated + mem_bytes = backend_max_memory_allocated(torch_device) + assert mem_bytes > 3 * 10**9 + max_diff = numpy_cosine_similarity_distance(image.flatten(), image_chunked.flatten()) + assert max_diff < 1e-3 + + def test_stable_diffusion_text2img_pipeline_v_pred_default(self): + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/" + "sd2-text2img/astronaut_riding_a_horse_v_pred.npy" + ) + + pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2") + pipe.to(torch_device) + pipe.enable_attention_slicing() + pipe.set_progress_bar_config(disable=None) + + prompt = "astronaut riding a horse" + + generator = torch.manual_seed(0) + output = pipe(prompt=prompt, guidance_scale=7.5, generator=generator, output_type="np") + image = output.images[0] + + assert image.shape == (768, 768, 3) + max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) + assert max_diff < 1e-3 + + def test_stable_diffusion_text2img_pipeline_unflawed(self): + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/" + "sd2-text2img/lion_galaxy.npy" + ) + + pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1") + pipe.scheduler = DDIMScheduler.from_config( + pipe.scheduler.config, timestep_spacing="trailing", rescale_betas_zero_snr=True + ) + pipe.enable_model_cpu_offload(device=torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k" + + generator = torch.Generator("cpu").manual_seed(0) + output = pipe( + prompt=prompt, + guidance_scale=7.5, + num_inference_steps=10, + guidance_rescale=0.7, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (768, 768, 3) + max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) + assert max_diff < 5e-2 + + def test_stable_diffusion_text2img_pipeline_v_pred_fp16(self): + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/" + "sd2-text2img/astronaut_riding_a_horse_v_pred_fp16.npy" + ) + + pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "astronaut riding a horse" + + generator = torch.manual_seed(0) + output = pipe(prompt=prompt, guidance_scale=7.5, generator=generator, output_type="np") + image = output.images[0] + + assert image.shape == (768, 768, 3) + max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) + assert max_diff < 1e-3 + + def test_download_local(self): + filename = hf_hub_download("stabilityai/stable-diffusion-2-1", filename="v2-1_768-ema-pruned.safetensors") + + pipe = StableDiffusionPipeline.from_single_file(filename, torch_dtype=torch.float16) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.enable_model_cpu_offload(device=torch_device) + + image_out = pipe("test", num_inference_steps=1, output_type="np").images[0] + + assert image_out.shape == (768, 768, 3) + + def test_stable_diffusion_text2img_intermediate_state_v_pred(self): + number_of_steps = 0 + + def test_callback_fn(step: int, timestep: int, latents: torch.Tensor) -> None: + test_callback_fn.has_been_called = True + nonlocal number_of_steps + number_of_steps += 1 + if step == 0: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 96, 96) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([0.7749, 0.0325, 0.5088, 0.1619, 0.3372, 0.3667, -0.5186, 0.6860, 1.4326]) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + elif step == 19: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 96, 96) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([1.3887, 1.0273, 1.7266, 0.0726, 0.6611, 0.1598, -1.0547, 0.1522, 0.0227]) + + assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2 + + test_callback_fn.has_been_called = False + + pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "Andromeda galaxy in a bottle" + + generator = torch.manual_seed(0) + pipe( + prompt=prompt, + num_inference_steps=20, + guidance_scale=7.5, + generator=generator, + callback=test_callback_fn, + callback_steps=1, + ) + assert test_callback_fn.has_been_called + assert number_of_steps == 20 + + def test_stable_diffusion_low_cpu_mem_usage_v_pred(self): + pipeline_id = "stabilityai/stable-diffusion-2" + + start_time = time.time() + pipeline_low_cpu_mem_usage = StableDiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16) + pipeline_low_cpu_mem_usage.to(torch_device) + low_cpu_mem_usage_time = time.time() - start_time + + start_time = time.time() + _ = StableDiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16, low_cpu_mem_usage=False) + normal_load_time = time.time() - start_time + + assert 2 * low_cpu_mem_usage_time < normal_load_time + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading_v_pred(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipeline_id = "stabilityai/stable-diffusion-2" + prompt = "Andromeda galaxy in a bottle" + + pipeline = StableDiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16) + pipeline.enable_attention_slicing(1) + pipeline.enable_sequential_cpu_offload(device=torch_device) + + generator = torch.manual_seed(0) + _ = pipeline(prompt, generator=generator, num_inference_steps=5) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.8 GB is allocated + assert mem_bytes < 2.8 * 10**9 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py new file mode 100644 index 0000000000000000000000000000000000000000..3ccefe3de35d9abe9ef1ae2de04646e250d2c4de --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py @@ -0,0 +1,262 @@ +import gc +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel + +from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline + +from ...testing_utils import ( + backend_empty_cache, + numpy_cosine_similarity_distance, + require_big_accelerator, + slow, + torch_device, +) +from ..test_pipelines_common import ( + PipelineTesterMixin, + check_qkv_fusion_matches_attn_procs_length, + check_qkv_fusion_processors_exist, +) + + +class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = StableDiffusion3Pipeline + params = frozenset( + [ + "prompt", + "height", + "width", + "guidance_scale", + "negative_prompt", + "prompt_embeds", + "negative_prompt_embeds", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt"]) + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SD3Transformer2DModel( + sample_size=32, + patch_size=1, + in_channels=4, + num_layers=1, + attention_head_dim=8, + num_attention_heads=4, + caption_projection_dim=32, + joint_attention_dim=32, + pooled_projection_dim=64, + out_channels=4, + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) + + text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=4, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "text_encoder_3": text_encoder_3, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "tokenizer_3": tokenizer_3, + "transformer": transformer, + "vae": vae, + "image_encoder": None, + "feature_extractor": None, + } + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + } + return inputs + + def test_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images[0] + generated_slice = image.flatten() + generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]]) + + # fmt: off + expected_slice = np.array([0.5112, 0.5228, 0.5235, 0.5524, 0.3188, 0.5017, 0.5574, 0.4899, 0.6812, 0.5991, 0.3908, 0.5213, 0.5582, 0.4457, 0.4204, 0.5616]) + # fmt: on + + self.assertTrue( + np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice." + ) + + def test_fused_qkv_projections(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + original_image_slice = image[0, -3:, -3:, -1] + + # TODO (sayakpaul): will refactor this once `fuse_qkv_projections()` has been added + # to the pipeline level. + pipe.transformer.fuse_qkv_projections() + assert check_qkv_fusion_processors_exist(pipe.transformer), ( + "Something wrong with the fused attention processors. Expected all the attention processors to be fused." + ) + assert check_qkv_fusion_matches_attn_procs_length( + pipe.transformer, pipe.transformer.original_attn_processors + ), "Something wrong with the attention processors concerning the fused QKV projections." + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_fused = image[0, -3:, -3:, -1] + + pipe.transformer.unfuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_disabled = image[0, -3:, -3:, -1] + + assert np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3), ( + "Fusion of QKV projections shouldn't affect the outputs." + ) + assert np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3), ( + "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled." + ) + assert np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2), ( + "Original outputs should match when fused QKV projections are disabled." + ) + + def test_skip_guidance_layers(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + output_full = pipe(**inputs)[0] + + inputs_with_skip = inputs.copy() + inputs_with_skip["skip_guidance_layers"] = [0] + output_skip = pipe(**inputs_with_skip)[0] + + self.assertFalse( + np.allclose(output_full, output_skip, atol=1e-5), "Outputs should differ when layers are skipped" + ) + + self.assertEqual(output_full.shape, output_skip.shape, "Outputs should have the same shape") + + inputs["num_images_per_prompt"] = 2 + output_full = pipe(**inputs)[0] + + inputs_with_skip = inputs.copy() + inputs_with_skip["skip_guidance_layers"] = [0] + output_skip = pipe(**inputs_with_skip)[0] + + self.assertFalse( + np.allclose(output_full, output_skip, atol=1e-5), "Outputs should differ when layers are skipped" + ) + + self.assertEqual(output_full.shape, output_skip.shape, "Outputs should have the same shape") + + +@slow +@require_big_accelerator +class StableDiffusion3PipelineSlowTests(unittest.TestCase): + pipeline_class = StableDiffusion3Pipeline + repo_id = "stabilityai/stable-diffusion-3-medium-diffusers" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + return { + "prompt": "A photo of a cat", + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "generator": generator, + } + + def test_sd3_inference(self): + pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + + image = pipe(**inputs).images[0] + image_slice = image[0, :10, :10] + # fmt: off + expected_slice = np.array([0.4648, 0.4404, 0.4177, 0.5063, 0.4800, 0.4287, 0.5425, 0.5190, 0.4717, 0.5430, 0.5195, 0.4766, 0.5361, 0.5122, 0.4612, 0.4871, 0.4749, 0.4058, 0.4756, 0.4678, 0.3804, 0.4832, 0.4822, 0.3799, 0.5103, 0.5034, 0.3953, 0.5073, 0.4839, 0.3884]) + # fmt: on + + max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten()) + + assert max_diff < 1e-4 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..9025b1060c9e2b063df84cb9fb719a90aeafa960 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py @@ -0,0 +1,210 @@ +import gc +import random +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + FlowMatchEulerDiscreteScheduler, + SD3Transformer2DModel, + StableDiffusion3Img2ImgPipeline, +) +from diffusers.utils import load_image + +from ...testing_utils import ( + Expectations, + backend_empty_cache, + floats_tensor, + numpy_cosine_similarity_distance, + require_big_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, +) +from ..test_pipelines_common import PipelineLatentTesterMixin, PipelineTesterMixin + + +class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unittest.TestCase, PipelineTesterMixin): + pipeline_class = StableDiffusion3Img2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SD3Transformer2DModel( + sample_size=32, + patch_size=1, + in_channels=4, + num_layers=1, + attention_head_dim=8, + num_attention_heads=4, + joint_attention_dim=32, + caption_projection_dim=32, + pooled_projection_dim=64, + out_channels=4, + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) + + text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=4, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "text_encoder_3": text_encoder_3, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "tokenizer_3": tokenizer_3, + "transformer": transformer, + "vae": vae, + "image_encoder": None, + "feature_extractor": None, + } + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "strength": 0.8, + } + return inputs + + def test_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images[0] + generated_slice = image.flatten() + generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]]) + + # fmt: off + expected_slice = np.array([0.4564, 0.5486, 0.4868, 0.5923, 0.3775, 0.5543, 0.4807, 0.4177, 0.3778, 0.5957, 0.5726, 0.4333, 0.6312, 0.5062, 0.4838, 0.5984]) + # fmt: on + + self.assertTrue( + np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice." + ) + + @unittest.skip("Skip for now.") + def test_multi_vae(self): + pass + + +@slow +@require_big_accelerator +class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase): + pipeline_class = StableDiffusion3Img2ImgPipeline + repo_id = "stabilityai/stable-diffusion-3-medium-diffusers" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, seed=0): + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + return { + "prompt": "A photo of a cat", + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "generator": generator, + "image": init_image, + } + + def test_sd3_img2img_inference(self): + torch.manual_seed(0) + pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16) + pipe.enable_model_cpu_offload(device=torch_device) + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images[0] + image_slice = image[0, :10, :10] + + # fmt: off + expected_slices = Expectations( + { + ("xpu", 3): np.array([0.5117, 0.4421, 0.3852, 0.5044, 0.4219, 0.3262, 0.5024, 0.4329, 0.3276, 0.4978, 0.4412, 0.3355, 0.4983, 0.4338, 0.3279, 0.4893, 0.4241, 0.3129, 0.4875, 0.4253, 0.3030, 0.4961, 0.4267, 0.2988, 0.5029, 0.4255, 0.3054, 0.5132, 0.4248, 0.3222]), + ("cuda", 7): np.array([0.5435, 0.4673, 0.5732, 0.4438, 0.3557, 0.4912, 0.4331, 0.3491, 0.4915, 0.4287, 0.347, 0.4849, 0.4355, 0.3469, 0.4871, 0.4431, 0.3538, 0.4912, 0.4521, 0.3643, 0.5059, 0.4587, 0.373, 0.5166, 0.4685, 0.3845, 0.5264, 0.4746, 0.3914, 0.5342]), + ("cuda", 8): np.array([0.5146, 0.4385, 0.3826, 0.5098, 0.4150, 0.3218, 0.5142, 0.4312, 0.3298, 0.5127, 0.4431, 0.3411, 0.5171, 0.4424, 0.3374, 0.5088, 0.4348, 0.3242, 0.5073, 0.4380, 0.3174, 0.5132, 0.4397, 0.3115, 0.5132, 0.4343, 0.3118, 0.5219, 0.4328, 0.3256]), + } + ) + # fmt: on + + expected_slice = expected_slices.get_expectation() + + max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten()) + + assert max_diff < 1e-4, f"Outputs are not close enough, got {max_diff}" diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..628930340294c7f54dd7813e1d921f0a65dc7ad2 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py @@ -0,0 +1,154 @@ +import random +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + FlowMatchEulerDiscreteScheduler, + SD3Transformer2DModel, + StableDiffusion3InpaintPipeline, +) + +from ...testing_utils import ( + enable_full_determinism, + floats_tensor, + torch_device, +) +from ..pipeline_params import ( + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import PipelineLatentTesterMixin, PipelineTesterMixin + + +enable_full_determinism() + + +class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unittest.TestCase, PipelineTesterMixin): + pipeline_class = StableDiffusion3InpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS + required_optional_params = PipelineTesterMixin.required_optional_params + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset( + [] + ) # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"mask", "masked_image_latents"}) + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = SD3Transformer2DModel( + sample_size=32, + patch_size=1, + in_channels=16, + num_layers=1, + attention_head_dim=8, + num_attention_heads=4, + joint_attention_dim=32, + caption_projection_dim=32, + pooled_projection_dim=64, + out_channels=16, + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config) + + text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=16, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "text_encoder_3": text_encoder_3, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "tokenizer_3": tokenizer_3, + "transformer": transformer, + "vae": vae, + "image_encoder": None, + "feature_extractor": None, + } + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + mask_image = torch.ones((1, 1, 32, 32)).to(device) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "mask_image": mask_image, + "height": 32, + "width": 32, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "strength": 0.8, + } + return inputs + + def test_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images[0] + generated_slice = image.flatten() + generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]]) + + # fmt: off + expected_slice = np.array([0.5035, 0.6661, 0.5859, 0.413, 0.4224, 0.4234, 0.7181, 0.5062, 0.5183, 0.6877, 0.5074, 0.585, 0.6111, 0.5422, 0.5306, 0.5891]) + # fmt: on + + self.assertTrue( + np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice." + ) + + @unittest.skip("Skip for now.") + def test_multi_vae(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_adapter/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_adapter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..79b38d1cad1c54912dbdaab9c3acb1317a0ddf1b --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py @@ -0,0 +1,727 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from parameterized import parameterized +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +import diffusers +from diffusers import ( + AutoencoderKL, + LCMScheduler, + MultiAdapter, + PNDMScheduler, + StableDiffusionAdapterPipeline, + T2IAdapter, + UNet2DConditionModel, +) +from diffusers.utils import logging +from diffusers.utils.import_utils import is_xformers_available + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS +from ..test_pipelines_common import PipelineFromPipeTesterMixin, PipelineTesterMixin, assert_mean_pixel_difference + + +enable_full_determinism() + + +class AdapterTests: + pipeline_class = StableDiffusionAdapterPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + + def get_dummy_components(self, adapter_type, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + time_cond_proj_dim=time_cond_proj_dim, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + + if adapter_type == "full_adapter" or adapter_type == "light_adapter": + adapter = T2IAdapter( + in_channels=3, + channels=[32, 64], + num_res_blocks=2, + downscale_factor=2, + adapter_type=adapter_type, + ) + elif adapter_type == "multi_adapter": + adapter = MultiAdapter( + [ + T2IAdapter( + in_channels=3, + channels=[32, 64], + num_res_blocks=2, + downscale_factor=2, + adapter_type="full_adapter", + ), + T2IAdapter( + in_channels=3, + channels=[32, 64], + num_res_blocks=2, + downscale_factor=2, + adapter_type="full_adapter", + ), + ] + ) + else: + raise ValueError( + f"Unknown adapter type: {adapter_type}, must be one of 'full_adapter', 'light_adapter', or 'multi_adapter''" + ) + + components = { + "adapter": adapter, + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + } + return components + + def get_dummy_components_with_full_downscaling(self, adapter_type): + """Get dummy components with x8 VAE downscaling and 4 UNet down blocks. + These dummy components are intended to fully-exercise the T2I-Adapter + downscaling behavior. + """ + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 32, 32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 32, 32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + + if adapter_type == "full_adapter" or adapter_type == "light_adapter": + adapter = T2IAdapter( + in_channels=3, + channels=[32, 32, 32, 64], + num_res_blocks=2, + downscale_factor=8, + adapter_type=adapter_type, + ) + elif adapter_type == "multi_adapter": + adapter = MultiAdapter( + [ + T2IAdapter( + in_channels=3, + channels=[32, 32, 32, 64], + num_res_blocks=2, + downscale_factor=8, + adapter_type="full_adapter", + ), + T2IAdapter( + in_channels=3, + channels=[32, 32, 32, 64], + num_res_blocks=2, + downscale_factor=8, + adapter_type="full_adapter", + ), + ] + ) + else: + raise ValueError( + f"Unknown adapter type: {adapter_type}, must be one of 'full_adapter', 'light_adapter', or 'multi_adapter''" + ) + + components = { + "adapter": adapter, + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "safety_checker": None, + "feature_extractor": None, + } + return components + + def get_dummy_inputs(self, device, seed=0, height=64, width=64, num_images=1): + if num_images == 1: + image = floats_tensor((1, 3, height, width), rng=random.Random(seed)).to(device) + else: + image = [ + floats_tensor((1, 3, height, width), rng=random.Random(seed)).to(device) for _ in range(num_images) + ] + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_attention_slicing_forward_pass(self): + return self._test_attention_slicing_forward_pass(expected_max_diff=2e-3) + + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=2e-3) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=2e-3) + + @parameterized.expand( + [ + # (dim=264) The internal feature map will be 33x33 after initial pixel unshuffling (downscaled x8). + (((4 * 8 + 1) * 8),), + # (dim=272) The internal feature map will be 17x17 after the first T2I down block (downscaled x16). + (((4 * 4 + 1) * 16),), + # (dim=288) The internal feature map will be 9x9 after the second T2I down block (downscaled x32). + (((4 * 2 + 1) * 32),), + # (dim=320) The internal feature map will be 5x5 after the third T2I down block (downscaled x64). + (((4 * 1 + 1) * 64),), + ] + ) + def test_multiple_image_dimensions(self, dim): + """Test that the T2I-Adapter pipeline supports any input dimension that + is divisible by the adapter's `downscale_factor`. This test was added in + response to an issue where the T2I Adapter's downscaling padding + behavior did not match the UNet's behavior. + + Note that we have selected `dim` values to produce odd resolutions at + each downscaling level. + """ + components = self.get_dummy_components_with_full_downscaling() + sd_pipe = StableDiffusionAdapterPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device, height=dim, width=dim) + image = sd_pipe(**inputs).images + + assert image.shape == (1, dim, dim, 3) + + def test_adapter_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionAdapterPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4535, 0.5493, 0.4359, 0.5452, 0.6086, 0.4441, 0.5544, 0.501, 0.4859]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_adapter_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionAdapterPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4535, 0.5493, 0.4359, 0.5452, 0.6086, 0.4441, 0.5544, 0.501, 0.4859]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + + +class StableDiffusionFullAdapterPipelineFastTests( + AdapterTests, PipelineTesterMixin, PipelineFromPipeTesterMixin, unittest.TestCase +): + def get_dummy_components(self, time_cond_proj_dim=None): + return super().get_dummy_components("full_adapter", time_cond_proj_dim=time_cond_proj_dim) + + def get_dummy_components_with_full_downscaling(self): + return super().get_dummy_components_with_full_downscaling("full_adapter") + + def test_stable_diffusion_adapter_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionAdapterPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4858, 0.5500, 0.4278, 0.4669, 0.6184, 0.4322, 0.5010, 0.5033, 0.4746]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 + + def test_from_pipe_consistent_forward_pass_cpu_offload(self): + super().test_from_pipe_consistent_forward_pass_cpu_offload(expected_max_diff=6e-3) + + +class StableDiffusionLightAdapterPipelineFastTests(AdapterTests, PipelineTesterMixin, unittest.TestCase): + def get_dummy_components(self, time_cond_proj_dim=None): + return super().get_dummy_components("light_adapter", time_cond_proj_dim=time_cond_proj_dim) + + def get_dummy_components_with_full_downscaling(self): + return super().get_dummy_components_with_full_downscaling("light_adapter") + + def test_stable_diffusion_adapter_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionAdapterPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4965, 0.5548, 0.4330, 0.4771, 0.6226, 0.4382, 0.5037, 0.5071, 0.4782]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 + + +class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterMixin, unittest.TestCase): + supports_dduf = False + + def get_dummy_components(self, time_cond_proj_dim=None): + return super().get_dummy_components("multi_adapter", time_cond_proj_dim=time_cond_proj_dim) + + def get_dummy_components_with_full_downscaling(self): + return super().get_dummy_components_with_full_downscaling("multi_adapter") + + def get_dummy_inputs(self, device, height=64, width=64, seed=0): + inputs = super().get_dummy_inputs(device, seed, height=height, width=width, num_images=2) + inputs["adapter_conditioning_scale"] = [0.5, 0.5] + return inputs + + def test_stable_diffusion_adapter_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionAdapterPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4902, 0.5539, 0.4317, 0.4682, 0.6190, 0.4351, 0.5018, 0.5046, 0.4772]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 + + def test_inference_batch_consistent( + self, batch_sizes=[2, 4, 13], additional_params_copy_to_batched_inputs=["num_inference_steps"] + ): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + logger = logging.get_logger(pipe.__module__) + logger.setLevel(level=diffusers.logging.FATAL) + + # batchify inputs + for batch_size in batch_sizes: + batched_inputs = {} + for name, value in inputs.items(): + if name in self.batch_params: + # prompt is string + if name == "prompt": + len_prompt = len(value) + # make unequal batch sizes + batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + + # make last batch super long + batched_inputs[name][-1] = 100 * "very long" + elif name == "image": + batched_images = [] + + for image in value: + batched_images.append(batch_size * [image]) + + batched_inputs[name] = batched_images + else: + batched_inputs[name] = batch_size * [value] + + elif name == "batch_size": + batched_inputs[name] = batch_size + else: + batched_inputs[name] = value + + for arg in additional_params_copy_to_batched_inputs: + batched_inputs[arg] = inputs[arg] + + batched_inputs["output_type"] = "np" + + if self.pipeline_class.__name__ == "DanceDiffusionPipeline": + batched_inputs.pop("output_type") + + output = pipe(**batched_inputs) + + assert len(output[0]) == batch_size + + batched_inputs["output_type"] = "np" + + if self.pipeline_class.__name__ == "DanceDiffusionPipeline": + batched_inputs.pop("output_type") + + output = pipe(**batched_inputs)[0] + + assert output.shape[0] == batch_size + + logger.setLevel(level=diffusers.logging.WARNING) + + def test_num_images_per_prompt(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + batch_sizes = [1, 2] + num_images_per_prompts = [1, 2] + + for batch_size in batch_sizes: + for num_images_per_prompt in num_images_per_prompts: + inputs = self.get_dummy_inputs(torch_device) + + for key in inputs.keys(): + if key in self.batch_params: + if key == "image": + batched_images = [] + + for image in inputs[key]: + batched_images.append(batch_size * [image]) + + inputs[key] = batched_images + else: + inputs[key] = batch_size * [inputs[key]] + + images = pipe(**inputs, num_images_per_prompt=num_images_per_prompt)[0] + + assert images.shape[0] == batch_size * num_images_per_prompt + + def test_inference_batch_single_identical( + self, + batch_size=3, + test_max_difference=None, + test_mean_pixel_difference=None, + relax_max_difference=False, + expected_max_diff=2e-3, + additional_params_copy_to_batched_inputs=["num_inference_steps"], + ): + if test_max_difference is None: + # TODO(Pedro) - not sure why, but not at all reproducible at the moment it seems + # make sure that batched and non-batched is identical + test_max_difference = torch_device != "mps" + + if test_mean_pixel_difference is None: + # TODO same as above + test_mean_pixel_difference = torch_device != "mps" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + logger = logging.get_logger(pipe.__module__) + logger.setLevel(level=diffusers.logging.FATAL) + + # batchify inputs + batched_inputs = {} + for name, value in inputs.items(): + if name in self.batch_params: + # prompt is string + if name == "prompt": + len_prompt = len(value) + # make unequal batch sizes + batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + + # make last batch super long + batched_inputs[name][-1] = 100 * "very long" + elif name == "image": + batched_images = [] + + for image in value: + batched_images.append(batch_size * [image]) + + batched_inputs[name] = batched_images + else: + batched_inputs[name] = batch_size * [value] + elif name == "batch_size": + batched_inputs[name] = batch_size + elif name == "generator": + batched_inputs[name] = [self.get_generator(i) for i in range(batch_size)] + else: + batched_inputs[name] = value + + for arg in additional_params_copy_to_batched_inputs: + batched_inputs[arg] = inputs[arg] + + if self.pipeline_class.__name__ != "DanceDiffusionPipeline": + batched_inputs["output_type"] = "np" + + output_batch = pipe(**batched_inputs) + assert output_batch[0].shape[0] == batch_size + + inputs["generator"] = self.get_generator(0) + + output = pipe(**inputs) + + logger.setLevel(level=diffusers.logging.WARNING) + if test_max_difference: + if relax_max_difference: + # Taking the median of the largest differences + # is resilient to outliers + diff = np.abs(output_batch[0][0] - output[0][0]) + diff = diff.flatten() + diff.sort() + max_diff = np.median(diff[-5:]) + else: + max_diff = np.abs(output_batch[0][0] - output[0][0]).max() + assert max_diff < expected_max_diff + + if test_mean_pixel_difference: + assert_mean_pixel_difference(output_batch[0][0], output[0][0]) + + +@slow +@require_torch_accelerator +class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_diffusion_adapter_depth_sd_v15(self): + adapter_model = "TencentARC/t2iadapter_depth_sd15v2" + sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5" + prompt = "desk" + image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/desk_depth.png" + input_channels = 3 + out_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/t2iadapter_depth_sd15v2.npy" + out_url = "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_adapter/sd_adapter_v15_zoe_depth.npy" + + image = load_image(image_url) + expected_out = load_numpy(out_url) + if input_channels == 1: + image = image.convert("L") + + adapter = T2IAdapter.from_pretrained(adapter_model, torch_dtype=torch.float16) + + pipe = StableDiffusionAdapterPipeline.from_pretrained(sd_model, adapter=adapter, safety_checker=None) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + generator = torch.Generator(device="cpu").manual_seed(0) + out = pipe(prompt=prompt, image=image, generator=generator, num_inference_steps=2, output_type="np").images + + max_diff = numpy_cosine_similarity_distance(out.flatten(), expected_out.flatten()) + assert max_diff < 1e-2 + + def test_stable_diffusion_adapter_zoedepth_sd_v15(self): + adapter_model = "TencentARC/t2iadapter_zoedepth_sd15v1" + sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5" + prompt = "motorcycle" + image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/motorcycle.png" + input_channels = 3 + out_url = "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_adapter/sd_adapter_v15_zoe_depth.npy" + + image = load_image(image_url) + expected_out = load_numpy(out_url) + if input_channels == 1: + image = image.convert("L") + + adapter = T2IAdapter.from_pretrained(adapter_model, torch_dtype=torch.float16) + + pipe = StableDiffusionAdapterPipeline.from_pretrained(sd_model, adapter=adapter, safety_checker=None) + pipe.set_progress_bar_config(disable=None) + pipe.enable_model_cpu_offload() + generator = torch.Generator(device="cpu").manual_seed(0) + out = pipe(prompt=prompt, image=image, generator=generator, num_inference_steps=2, output_type="np").images + + max_diff = numpy_cosine_similarity_distance(out.flatten(), expected_out.flatten()) + assert max_diff < 1e-2 + + def test_stable_diffusion_adapter_canny_sd_v15(self): + adapter_model = "TencentARC/t2iadapter_canny_sd15v2" + sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5" + prompt = "toy" + image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png" + input_channels = 1 + out_url = "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_adapter/sd_adapter_v15_zoe_depth.npy" + + image = load_image(image_url) + expected_out = load_numpy(out_url) + if input_channels == 1: + image = image.convert("L") + + adapter = T2IAdapter.from_pretrained(adapter_model, torch_dtype=torch.float16) + + pipe = StableDiffusionAdapterPipeline.from_pretrained(sd_model, adapter=adapter, safety_checker=None) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + generator = torch.Generator(device="cpu").manual_seed(0) + + out = pipe(prompt=prompt, image=image, generator=generator, num_inference_steps=2, output_type="np").images + + max_diff = numpy_cosine_similarity_distance(out.flatten(), expected_out.flatten()) + assert max_diff < 1e-2 + + def test_stable_diffusion_adapter_sketch_sd15(self): + adapter_model = "TencentARC/t2iadapter_sketch_sd15v2" + sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5" + prompt = "cat" + image_url = ( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/edge.png" + ) + input_channels = 1 + out_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/t2iadapter_sketch_sd15v2.npy" + + image = load_image(image_url) + expected_out = load_numpy(out_url) + if input_channels == 1: + image = image.convert("L") + + adapter = T2IAdapter.from_pretrained(adapter_model, torch_dtype=torch.float16) + + pipe = StableDiffusionAdapterPipeline.from_pretrained(sd_model, adapter=adapter, safety_checker=None) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + generator = torch.Generator(device="cpu").manual_seed(0) + + out = pipe(prompt=prompt, image=image, generator=generator, num_inference_steps=2, output_type="np").images + + max_diff = numpy_cosine_similarity_distance(out.flatten(), expected_out.flatten()) + assert max_diff < 1e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_image_variation/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_image_variation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py new file mode 100644 index 0000000000000000000000000000000000000000..dbf5a7b68eae3a593cce010f9ef742ff89cc1b90 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_image_variation/test_stable_diffusion_image_variation.py @@ -0,0 +1,345 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import CLIPImageProcessor, CLIPVisionConfig, CLIPVisionModelWithProjection + +from diffusers import ( + AutoencoderKL, + DPMSolverMultistepScheduler, + PNDMScheduler, + StableDiffusionImageVariationPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + nightly, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import IMAGE_VARIATION_BATCH_PARAMS, IMAGE_VARIATION_PARAMS +from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin + + +enable_full_determinism() + + +class StableDiffusionImageVariationPipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionImageVariationPipeline + params = IMAGE_VARIATION_PARAMS + batch_params = IMAGE_VARIATION_BATCH_PARAMS + image_params = frozenset([]) + # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + scheduler = PNDMScheduler(skip_prk_steps=True) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=32, + projection_dim=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + image_size=32, + patch_size=4, + ) + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + feature_extractor = CLIPImageProcessor(crop_size=32, size=32) + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "image_encoder": image_encoder, + "feature_extractor": feature_extractor, + "safety_checker": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)) + image = image.cpu().permute(0, 2, 3, 1)[0] + image = Image.fromarray(np.uint8(image)).convert("RGB").resize((32, 32)) + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_img_variation_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionImageVariationPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_stable_diffusion_img_variation_multiple_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionImageVariationPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["image"] = 2 * [inputs["image"]] + output = sd_pipe(**inputs) + + image = output.images + + image_slice = image[-1, -3:, -3:, -1] + + assert image.shape == (2, 64, 64, 3) + expected_slice = np.array([0.6647, 0.5557, 0.5723, 0.5567, 0.5869, 0.6044, 0.5502, 0.5439, 0.5189]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + +@slow +@require_torch_accelerator +class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_imgvar/input_image_vermeer.png" + ) + latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "image": init_image, + "latents": latents, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_img_variation_pipeline_default(self): + sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained( + "lambdalabs/sd-image-variations-diffusers", safety_checker=None + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_inputs(generator_device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851]) + + max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice) + assert max_diff < 1e-4 + + def test_stable_diffusion_img_variation_intermediate_state(self): + number_of_steps = 0 + + def callback_fn(step: int, timestep: int, latents: torch.Tensor) -> None: + callback_fn.has_been_called = True + nonlocal number_of_steps + number_of_steps += 1 + if step == 1: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851]) + max_diff = numpy_cosine_similarity_distance(latents_slice.flatten(), expected_slice) + + assert max_diff < 1e-3 + + elif step == 2: + latents = latents.detach().cpu().numpy() + assert latents.shape == (1, 4, 64, 64) + latents_slice = latents[0, -3:, -3:, -1] + expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851]) + max_diff = numpy_cosine_similarity_distance(latents_slice.flatten(), expected_slice) + + assert max_diff < 1e-3 + + callback_fn.has_been_called = False + + pipe = StableDiffusionImageVariationPipeline.from_pretrained( + "lambdalabs/sd-image-variations-diffusers", + safety_checker=None, + torch_dtype=torch.float16, + ) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + generator_device = "cpu" + inputs = self.get_inputs(generator_device, dtype=torch.float16) + pipe(**inputs, callback=callback_fn, callback_steps=1) + assert callback_fn.has_been_called + assert number_of_steps == inputs["num_inference_steps"] + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableDiffusionImageVariationPipeline.from_pretrained( + "lambdalabs/sd-image-variations-diffusers", safety_checker=None, torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device, dtype=torch.float16) + _ = pipe(**inputs) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 2.6 GB is allocated + assert mem_bytes < 2.6 * 10**9 + + +@nightly +@require_torch_accelerator +class StableDiffusionImageVariationPipelineNightlyTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_imgvar/input_image_vermeer.png" + ) + latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64)) + latents = torch.from_numpy(latents).to(device=device, dtype=dtype) + inputs = { + "image": init_image, + "latents": latents, + "generator": generator, + "num_inference_steps": 50, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_img_variation_pndm(self): + sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained("fusing/sd-image-variations-diffusers") + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_imgvar/lambdalabs_variations_pndm.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 + + def test_img_variation_dpm(self): + sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained("fusing/sd-image-variations-diffusers") + sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + inputs["num_inference_steps"] = 25 + image = sd_pipe(**inputs).images[0] + + expected_image = load_numpy( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_imgvar/lambdalabs_variations_dpm_multi.npy" + ) + max_diff = np.abs(expected_image - image).max() + assert max_diff < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py new file mode 100644 index 0000000000000000000000000000000000000000..b318a505e9db90e5c5d875cbdf6155fc087fab72 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py @@ -0,0 +1,976 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import gc +import tempfile +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + LCMScheduler, + StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLPipeline, + UNet2DConditionModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + load_image, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_TO_IMAGE_BATCH_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, + TEXT_TO_IMAGE_IMAGE_PARAMS, + TEXT_TO_IMAGE_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + SDFunctionTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionXLPipelineFastTests( + SDFunctionTesterMixin, + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionXLPipeline + params = TEXT_TO_IMAGE_PARAMS + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union({"add_text_embeds", "add_time_ids"}) + test_layerwise_casting = True + test_group_offloading = True + + def get_dummy_components(self, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(2, 4), + layers_per_block=2, + time_cond_proj_dim=time_cond_proj_dim, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=80, # 6 * 8 + 32 + cross_attention_dim=64, + norm_num_groups=1, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "image_encoder": None, + "feature_extractor": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + } + return inputs + + def test_stable_diffusion_xl_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5388, 0.5452, 0.4694, 0.4583, 0.5253, 0.4832, 0.5288, 0.5035, 0.47]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_xl_euler_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4917, 0.6555, 0.4348, 0.5219, 0.7324, 0.4855, 0.5168, 0.5447, 0.5156]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_xl_euler_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.4917, 0.6555, 0.4348, 0.5219, 0.7324, 0.4855, 0.5168, 0.5447, 0.5156]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_ays(self): + from diffusers.schedulers import AysSchedules + + timestep_schedule = AysSchedules["StableDiffusionXLTimesteps"] + sigma_schedule = AysSchedules["StableDiffusionXLSigmas"] + + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe.scheduler = EulerDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["num_inference_steps"] = 10 + output = sd_pipe(**inputs).images + + inputs = self.get_dummy_inputs(device) + inputs["num_inference_steps"] = None + inputs["timesteps"] = timestep_schedule + output_ts = sd_pipe(**inputs).images + + inputs = self.get_dummy_inputs(device) + inputs["num_inference_steps"] = None + inputs["sigmas"] = sigma_schedule + output_sigmas = sd_pipe(**inputs).images + + assert np.abs(output_sigmas.flatten() - output_ts.flatten()).max() < 1e-3, ( + "ays timesteps and ays sigmas should have the same outputs" + ) + assert np.abs(output.flatten() - output_ts.flatten()).max() > 1e-3, ( + "use ays timesteps should have different outputs" + ) + assert np.abs(output.flatten() - output_sigmas.flatten()).max() > 1e-3, ( + "use ays sigmas should have different outputs" + ) + + def test_ip_adapter(self): + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array([0.5388, 0.5452, 0.4694, 0.4583, 0.5253, 0.4832, 0.5288, 0.5035, 0.4766]) + + return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + @require_torch_accelerator + def test_stable_diffusion_xl_offloads(self): + pipes = [] + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components).to(torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe.enable_model_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe.enable_sequential_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + image_slices = [] + for pipe in pipes: + pipe.unet.set_default_attn_processor() + + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3 + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + def test_stable_diffusion_two_xl_mixture_of_denoiser_fast(self): + components = self.get_dummy_components() + pipe_1 = StableDiffusionXLPipeline(**components).to(torch_device) + pipe_1.unet.set_default_attn_processor() + pipe_2 = StableDiffusionXLImg2ImgPipeline(**components).to(torch_device) + pipe_2.unet.set_default_attn_processor() + + def assert_run_mixture( + num_steps, + split, + scheduler_cls_orig, + expected_tss, + num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps, + ): + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = num_steps + + class scheduler_cls(scheduler_cls_orig): + pass + + pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config) + pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config) + + # Let's retrieve the number of timesteps we want to use + pipe_1.scheduler.set_timesteps(num_steps) + expected_steps = pipe_1.scheduler.timesteps.tolist() + + if pipe_1.scheduler.order == 2: + expected_steps_1 = list(filter(lambda ts: ts >= split, expected_tss)) + expected_steps_2 = expected_steps_1[-1:] + list(filter(lambda ts: ts < split, expected_tss)) + expected_steps = expected_steps_1 + expected_steps_2 + else: + expected_steps_1 = list(filter(lambda ts: ts >= split, expected_tss)) + expected_steps_2 = list(filter(lambda ts: ts < split, expected_tss)) + + # now we monkey patch step `done_steps` + # list into the step function for testing + done_steps = [] + old_step = copy.copy(scheduler_cls.step) + + def new_step(self, *args, **kwargs): + done_steps.append(args[1].cpu().item()) # args[1] is always the passed `t` + return old_step(self, *args, **kwargs) + + scheduler_cls.step = new_step + + inputs_1 = { + **inputs, + **{ + "denoising_end": 1.0 - (split / num_train_timesteps), + "output_type": "latent", + }, + } + latents = pipe_1(**inputs_1).images[0] + + assert expected_steps_1 == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + inputs_2 = { + **inputs, + **{ + "denoising_start": 1.0 - (split / num_train_timesteps), + "image": latents, + }, + } + pipe_2(**inputs_2).images[0] + + assert expected_steps_2 == done_steps[len(expected_steps_1) :] + assert expected_steps == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + steps = 10 + for split in [300, 700]: + for scheduler_cls_timesteps in [ + (EulerDiscreteScheduler, [901, 801, 701, 601, 501, 401, 301, 201, 101, 1]), + ( + HeunDiscreteScheduler, + [ + 901.0, + 801.0, + 801.0, + 701.0, + 701.0, + 601.0, + 601.0, + 501.0, + 501.0, + 401.0, + 401.0, + 301.0, + 301.0, + 201.0, + 201.0, + 101.0, + 101.0, + 1.0, + 1.0, + ], + ), + ]: + assert_run_mixture(steps, split, scheduler_cls_timesteps[0], scheduler_cls_timesteps[1]) + + @slow + def test_stable_diffusion_two_xl_mixture_of_denoiser(self): + components = self.get_dummy_components() + pipe_1 = StableDiffusionXLPipeline(**components).to(torch_device) + pipe_1.unet.set_default_attn_processor() + pipe_2 = StableDiffusionXLImg2ImgPipeline(**components).to(torch_device) + pipe_2.unet.set_default_attn_processor() + + def assert_run_mixture( + num_steps, + split, + scheduler_cls_orig, + expected_tss, + num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps, + ): + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = num_steps + + class scheduler_cls(scheduler_cls_orig): + pass + + pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config) + pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config) + + # Let's retrieve the number of timesteps we want to use + pipe_1.scheduler.set_timesteps(num_steps) + expected_steps = pipe_1.scheduler.timesteps.tolist() + + if pipe_1.scheduler.order == 2: + expected_steps_1 = list(filter(lambda ts: ts >= split, expected_tss)) + expected_steps_2 = expected_steps_1[-1:] + list(filter(lambda ts: ts < split, expected_tss)) + expected_steps = expected_steps_1 + expected_steps_2 + else: + expected_steps_1 = list(filter(lambda ts: ts >= split, expected_tss)) + expected_steps_2 = list(filter(lambda ts: ts < split, expected_tss)) + + # now we monkey patch step `done_steps` + # list into the step function for testing + done_steps = [] + old_step = copy.copy(scheduler_cls.step) + + def new_step(self, *args, **kwargs): + done_steps.append(args[1].cpu().item()) # args[1] is always the passed `t` + return old_step(self, *args, **kwargs) + + scheduler_cls.step = new_step + + inputs_1 = { + **inputs, + **{ + "denoising_end": 1.0 - (split / num_train_timesteps), + "output_type": "latent", + }, + } + latents = pipe_1(**inputs_1).images[0] + + assert expected_steps_1 == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + inputs_2 = { + **inputs, + **{ + "denoising_start": 1.0 - (split / num_train_timesteps), + "image": latents, + }, + } + pipe_2(**inputs_2).images[0] + + assert expected_steps_2 == done_steps[len(expected_steps_1) :] + assert expected_steps == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + steps = 10 + for split in [300, 500, 700]: + for scheduler_cls_timesteps in [ + (DDIMScheduler, [901, 801, 701, 601, 501, 401, 301, 201, 101, 1]), + (EulerDiscreteScheduler, [901, 801, 701, 601, 501, 401, 301, 201, 101, 1]), + (DPMSolverMultistepScheduler, [901, 811, 721, 631, 541, 451, 361, 271, 181, 91]), + (UniPCMultistepScheduler, [901, 811, 721, 631, 541, 451, 361, 271, 181, 91]), + ( + HeunDiscreteScheduler, + [ + 901.0, + 801.0, + 801.0, + 701.0, + 701.0, + 601.0, + 601.0, + 501.0, + 501.0, + 401.0, + 401.0, + 301.0, + 301.0, + 201.0, + 201.0, + 101.0, + 101.0, + 1.0, + 1.0, + ], + ), + ]: + assert_run_mixture(steps, split, scheduler_cls_timesteps[0], scheduler_cls_timesteps[1]) + + steps = 25 + for split in [300, 500, 700]: + for scheduler_cls_timesteps in [ + ( + DDIMScheduler, + [ + 961, + 921, + 881, + 841, + 801, + 761, + 721, + 681, + 641, + 601, + 561, + 521, + 481, + 441, + 401, + 361, + 321, + 281, + 241, + 201, + 161, + 121, + 81, + 41, + 1, + ], + ), + ( + EulerDiscreteScheduler, + [ + 961.0, + 921.0, + 881.0, + 841.0, + 801.0, + 761.0, + 721.0, + 681.0, + 641.0, + 601.0, + 561.0, + 521.0, + 481.0, + 441.0, + 401.0, + 361.0, + 321.0, + 281.0, + 241.0, + 201.0, + 161.0, + 121.0, + 81.0, + 41.0, + 1.0, + ], + ), + ( + DPMSolverMultistepScheduler, + [ + 951, + 913, + 875, + 837, + 799, + 761, + 723, + 685, + 647, + 609, + 571, + 533, + 495, + 457, + 419, + 381, + 343, + 305, + 267, + 229, + 191, + 153, + 115, + 77, + 39, + ], + ), + ( + UniPCMultistepScheduler, + [ + 951, + 913, + 875, + 837, + 799, + 761, + 723, + 685, + 647, + 609, + 571, + 533, + 495, + 457, + 419, + 381, + 343, + 305, + 267, + 229, + 191, + 153, + 115, + 77, + 39, + ], + ), + ( + HeunDiscreteScheduler, + [ + 961.0, + 921.0, + 921.0, + 881.0, + 881.0, + 841.0, + 841.0, + 801.0, + 801.0, + 761.0, + 761.0, + 721.0, + 721.0, + 681.0, + 681.0, + 641.0, + 641.0, + 601.0, + 601.0, + 561.0, + 561.0, + 521.0, + 521.0, + 481.0, + 481.0, + 441.0, + 441.0, + 401.0, + 401.0, + 361.0, + 361.0, + 321.0, + 321.0, + 281.0, + 281.0, + 241.0, + 241.0, + 201.0, + 201.0, + 161.0, + 161.0, + 121.0, + 121.0, + 81.0, + 81.0, + 41.0, + 41.0, + 1.0, + 1.0, + ], + ), + ]: + assert_run_mixture(steps, split, scheduler_cls_timesteps[0], scheduler_cls_timesteps[1]) + + @slow + def test_stable_diffusion_three_xl_mixture_of_denoiser(self): + components = self.get_dummy_components() + pipe_1 = StableDiffusionXLPipeline(**components).to(torch_device) + pipe_1.unet.set_default_attn_processor() + pipe_2 = StableDiffusionXLImg2ImgPipeline(**components).to(torch_device) + pipe_2.unet.set_default_attn_processor() + pipe_3 = StableDiffusionXLImg2ImgPipeline(**components).to(torch_device) + pipe_3.unet.set_default_attn_processor() + + def assert_run_mixture( + num_steps, + split_1, + split_2, + scheduler_cls_orig, + num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps, + ): + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = num_steps + + class scheduler_cls(scheduler_cls_orig): + pass + + pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config) + pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config) + pipe_3.scheduler = scheduler_cls.from_config(pipe_3.scheduler.config) + + # Let's retrieve the number of timesteps we want to use + pipe_1.scheduler.set_timesteps(num_steps) + expected_steps = pipe_1.scheduler.timesteps.tolist() + + split_1_ts = num_train_timesteps - int(round(num_train_timesteps * split_1)) + split_2_ts = num_train_timesteps - int(round(num_train_timesteps * split_2)) + + if pipe_1.scheduler.order == 2: + expected_steps_1 = list(filter(lambda ts: ts >= split_1_ts, expected_steps)) + expected_steps_2 = expected_steps_1[-1:] + list( + filter(lambda ts: ts >= split_2_ts and ts < split_1_ts, expected_steps) + ) + expected_steps_3 = expected_steps_2[-1:] + list(filter(lambda ts: ts < split_2_ts, expected_steps)) + expected_steps = expected_steps_1 + expected_steps_2 + expected_steps_3 + else: + expected_steps_1 = list(filter(lambda ts: ts >= split_1_ts, expected_steps)) + expected_steps_2 = list(filter(lambda ts: ts >= split_2_ts and ts < split_1_ts, expected_steps)) + expected_steps_3 = list(filter(lambda ts: ts < split_2_ts, expected_steps)) + + # now we monkey patch step `done_steps` + # list into the step function for testing + done_steps = [] + old_step = copy.copy(scheduler_cls.step) + + def new_step(self, *args, **kwargs): + done_steps.append(args[1].cpu().item()) # args[1] is always the passed `t` + return old_step(self, *args, **kwargs) + + scheduler_cls.step = new_step + + inputs_1 = {**inputs, **{"denoising_end": split_1, "output_type": "latent"}} + latents = pipe_1(**inputs_1).images[0] + + assert expected_steps_1 == done_steps, ( + f"Failure with {scheduler_cls.__name__} and {num_steps} and {split_1} and {split_2}" + ) + + with self.assertRaises(ValueError) as cm: + inputs_2 = { + **inputs, + **{ + "denoising_start": split_2, + "denoising_end": split_1, + "image": latents, + "output_type": "latent", + }, + } + pipe_2(**inputs_2).images[0] + assert "cannot be larger than or equal to `denoising_end`" in str(cm.exception) + + inputs_2 = { + **inputs, + **{"denoising_start": split_1, "denoising_end": split_2, "image": latents, "output_type": "latent"}, + } + pipe_2(**inputs_2).images[0] + + assert expected_steps_2 == done_steps[len(expected_steps_1) :] + + inputs_3 = {**inputs, **{"denoising_start": split_2, "image": latents}} + pipe_3(**inputs_3).images[0] + + assert expected_steps_3 == done_steps[len(expected_steps_1) + len(expected_steps_2) :] + assert expected_steps == done_steps, ( + f"Failure with {scheduler_cls.__name__} and {num_steps} and {split_1} and {split_2}" + ) + + for steps in [7, 11, 20]: + for split_1, split_2 in zip([0.19, 0.32], [0.81, 0.68]): + for scheduler_cls in [ + DDIMScheduler, + EulerDiscreteScheduler, + DPMSolverMultistepScheduler, + UniPCMultistepScheduler, + HeunDiscreteScheduler, + ]: + assert_run_mixture(steps, split_1, split_2, scheduler_cls) + + def test_stable_diffusion_xl_multi_prompts(self): + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components).to(torch_device) + + # forward with single prompt + inputs = self.get_dummy_inputs(torch_device) + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with same prompt duplicated + inputs = self.get_dummy_inputs(torch_device) + inputs["prompt_2"] = inputs["prompt"] + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # ensure the results are equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + # forward with different prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["prompt_2"] = "different prompt" + output = sd_pipe(**inputs) + image_slice_3 = output.images[0, -3:, -3:, -1] + + # ensure the results are not equal + assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 + + # manually set a negative_prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["negative_prompt"] = "negative prompt" + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with same negative_prompt duplicated + inputs = self.get_dummy_inputs(torch_device) + inputs["negative_prompt"] = "negative prompt" + inputs["negative_prompt_2"] = inputs["negative_prompt"] + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # ensure the results are equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + # forward with different negative_prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["negative_prompt"] = "negative prompt" + inputs["negative_prompt_2"] = "different negative prompt" + output = sd_pipe(**inputs) + image_slice_3 = output.images[0, -3:, -3:, -1] + + # ensure the results are not equal + assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 + + def test_stable_diffusion_xl_negative_conditions(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice_with_no_neg_cond = image[0, -3:, -3:, -1] + + image = sd_pipe( + **inputs, + negative_original_size=(512, 512), + negative_crops_coords_top_left=(0, 0), + negative_target_size=(1024, 1024), + ).images + image_slice_with_neg_cond = image[0, -3:, -3:, -1] + + self.assertTrue(np.abs(image_slice_with_no_neg_cond - image_slice_with_neg_cond).max() > 1e-2) + + def test_stable_diffusion_xl_save_from_pretrained(self): + pipes = [] + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components).to(torch_device) + pipes.append(sd_pipe) + + with tempfile.TemporaryDirectory() as tmpdirname: + sd_pipe.save_pretrained(tmpdirname) + sd_pipe = StableDiffusionXLPipeline.from_pretrained(tmpdirname).to(torch_device) + pipes.append(sd_pipe) + + image_slices = [] + for pipe in pipes: + pipe.unet.set_default_attn_processor() + + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + + def test_pipeline_interrupt(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + num_inference_steps = 3 + + # store intermediate latents from the generation process + class PipelineState: + def __init__(self): + self.state = [] + + def apply(self, pipe, i, t, callback_kwargs): + self.state.append(callback_kwargs["latents"]) + return callback_kwargs + + pipe_state = PipelineState() + sd_pipe( + prompt, + num_inference_steps=num_inference_steps, + output_type="np", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=pipe_state.apply, + ).images + + # interrupt generation at step index + interrupt_step_idx = 1 + + def callback_on_step_end(pipe, i, t, callback_kwargs): + if i == interrupt_step_idx: + pipe._interrupt = True + + return callback_kwargs + + output_interrupted = sd_pipe( + prompt, + num_inference_steps=num_inference_steps, + output_type="latent", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=callback_on_step_end, + ).images + + # fetch intermediate latents at the interrupted step + # from the completed generation process + intermediate_latent = pipe_state.state[interrupt_step_idx] + + # compare the intermediate latent to the output of the interrupted process + # they should be the same + assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) + + +@slow +class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_diffusion_lcm(self): + torch.manual_seed(0) + unet = UNet2DConditionModel.from_pretrained( + "latent-consistency/lcm-ssd-1b", torch_dtype=torch.float16, variant="fp16" + ) + sd_pipe = StableDiffusionXLPipeline.from_pretrained( + "segmind/SSD-1B", unet=unet, torch_dtype=torch.float16, variant="fp16" + ).to(torch_device) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "a red car standing on the side of the street" + + image = sd_pipe( + prompt, num_inference_steps=4, guidance_scale=8.0, generator=torch.Generator("cpu").manual_seed(0) + ).images[0] + expected_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_full/stable_diffusion_ssd_1b_lcm.png" + ) + + image = sd_pipe.image_processor.pil_to_numpy(image) + expected_image = sd_pipe.image_processor.pil_to_numpy(expected_image) + + max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) + + assert max_diff < 1e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..3d72270dda5c7e89cae1ad75bc0ebaff038d86cb --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -0,0 +1,671 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import unittest + +import numpy as np +import torch +from parameterized import parameterized +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer + +import diffusers +from diffusers import ( + AutoencoderKL, + EulerDiscreteScheduler, + LCMScheduler, + MultiAdapter, + StableDiffusionXLAdapterPipeline, + T2IAdapter, + UNet2DConditionModel, +) +from diffusers.utils import logging + +from ...testing_utils import ( + enable_full_determinism, + floats_tensor, + torch_device, +) +from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineTesterMixin, + assert_mean_pixel_difference, +) + + +enable_full_determinism() + + +class StableDiffusionXLAdapterPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, unittest.TestCase): + pipeline_class = StableDiffusionXLAdapterPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + + def get_dummy_components(self, adapter_type="full_adapter_xl", time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=80, # 6 * 8 + 32 + cross_attention_dim=64, + time_cond_proj_dim=time_cond_proj_dim, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + if adapter_type == "full_adapter_xl": + adapter = T2IAdapter( + in_channels=3, + channels=[32, 64], + num_res_blocks=2, + downscale_factor=4, + adapter_type=adapter_type, + ) + elif adapter_type == "multi_adapter": + adapter = MultiAdapter( + [ + T2IAdapter( + in_channels=3, + channels=[32, 64], + num_res_blocks=2, + downscale_factor=4, + adapter_type="full_adapter_xl", + ), + T2IAdapter( + in_channels=3, + channels=[32, 64], + num_res_blocks=2, + downscale_factor=4, + adapter_type="full_adapter_xl", + ), + ] + ) + else: + raise ValueError( + f"Unknown adapter type: {adapter_type}, must be one of 'full_adapter_xl', or 'multi_adapter''" + ) + + components = { + "adapter": adapter, + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + # "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_components_with_full_downscaling(self, adapter_type="full_adapter_xl"): + """Get dummy components with x8 VAE downscaling and 3 UNet down blocks. + These dummy components are intended to fully-exercise the T2I-Adapter + downscaling behavior. + """ + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=2, + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=1, + projection_class_embeddings_input_dim=80, # 6 * 8 + 32 + cross_attention_dim=64, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 32, 32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + if adapter_type == "full_adapter_xl": + adapter = T2IAdapter( + in_channels=3, + channels=[32, 32, 64], + num_res_blocks=2, + downscale_factor=16, + adapter_type=adapter_type, + ) + elif adapter_type == "multi_adapter": + adapter = MultiAdapter( + [ + T2IAdapter( + in_channels=3, + channels=[32, 32, 64], + num_res_blocks=2, + downscale_factor=16, + adapter_type="full_adapter_xl", + ), + T2IAdapter( + in_channels=3, + channels=[32, 32, 64], + num_res_blocks=2, + downscale_factor=16, + adapter_type="full_adapter_xl", + ), + ] + ) + else: + raise ValueError( + f"Unknown adapter type: {adapter_type}, must be one of 'full_adapter_xl', or 'multi_adapter''" + ) + + components = { + "adapter": adapter, + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + # "safety_checker": None, + "feature_extractor": None, + "image_encoder": None, + } + return components + + def get_dummy_inputs(self, device, seed=0, height=64, width=64, num_images=1): + if num_images == 1: + image = floats_tensor((1, 3, height, width), rng=random.Random(seed)).to(device) + else: + image = [ + floats_tensor((1, 3, height, width), rng=random.Random(seed)).to(device) for _ in range(num_images) + ] + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + } + return inputs + + def test_ip_adapter(self, from_multi=False, expected_pipe_slice=None): + if not from_multi: + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array( + [0.5752, 0.6155, 0.4826, 0.5111, 0.5741, 0.4678, 0.5199, 0.5231, 0.4794] + ) + + return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + def test_stable_diffusion_adapter_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([00.5752, 0.6155, 0.4826, 0.5111, 0.5741, 0.4678, 0.5199, 0.5231, 0.4794]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 + + @parameterized.expand( + [ + # (dim=144) The internal feature map will be 9x9 after initial pixel unshuffling (downscaled x16). + (((4 * 2 + 1) * 16),), + # (dim=160) The internal feature map will be 5x5 after the first T2I down block (downscaled x32). + (((4 * 1 + 1) * 32),), + ] + ) + def test_multiple_image_dimensions(self, dim): + """Test that the T2I-Adapter pipeline supports any input dimension that + is divisible by the adapter's `downscale_factor`. This test was added in + response to an issue where the T2I Adapter's downscaling padding + behavior did not match the UNet's behavior. + + Note that we have selected `dim` values to produce odd resolutions at + each downscaling level. + """ + components = self.get_dummy_components_with_full_downscaling() + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device, height=dim, width=dim) + image = sd_pipe(**inputs).images + + assert image.shape == (1, dim, dim, 3) + + @parameterized.expand(["full_adapter", "full_adapter_xl", "light_adapter"]) + def test_total_downscale_factor(self, adapter_type): + """Test that the T2IAdapter correctly reports its total_downscale_factor.""" + batch_size = 1 + in_channels = 3 + out_channels = [320, 640, 1280, 1280] + in_image_size = 512 + + adapter = T2IAdapter( + in_channels=in_channels, + channels=out_channels, + num_res_blocks=2, + downscale_factor=8, + adapter_type=adapter_type, + ) + adapter.to(torch_device) + + in_image = floats_tensor((batch_size, in_channels, in_image_size, in_image_size)).to(torch_device) + + adapter_state = adapter(in_image) + + # Assume that the last element in `adapter_state` has been downsampled the most, and check + # that it matches the `total_downscale_factor`. + expected_out_image_size = in_image_size // adapter.total_downscale_factor + assert adapter_state[-1].shape == ( + batch_size, + out_channels[-1], + expected_out_image_size, + expected_out_image_size, + ) + + def test_adapter_sdxl_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5425, 0.5385, 0.4964, 0.5045, 0.6149, 0.4974, 0.5469, 0.5332, 0.5426]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_adapter_sdxl_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5425, 0.5385, 0.4964, 0.5045, 0.6149, 0.4974, 0.5469, 0.5332, 0.5426]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + +class StableDiffusionXLMultiAdapterPipelineFastTests( + StableDiffusionXLAdapterPipelineFastTests, PipelineTesterMixin, unittest.TestCase +): + supports_dduf = False + + def get_dummy_components(self, time_cond_proj_dim=None): + return super().get_dummy_components("multi_adapter", time_cond_proj_dim=time_cond_proj_dim) + + def get_dummy_components_with_full_downscaling(self): + return super().get_dummy_components_with_full_downscaling("multi_adapter") + + def get_dummy_inputs(self, device, seed=0, height=64, width=64): + inputs = super().get_dummy_inputs(device, seed, height, width, num_images=2) + inputs["adapter_conditioning_scale"] = [0.5, 0.5] + return inputs + + def test_stable_diffusion_adapter_default_case(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5617, 0.6081, 0.4807, 0.5071, 0.5665, 0.4614, 0.5165, 0.5164, 0.4786]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3 + + def test_ip_adapter(self): + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array([0.5617, 0.6081, 0.4807, 0.5071, 0.5665, 0.4614, 0.5165, 0.5164, 0.4786]) + + return super().test_ip_adapter(from_multi=True, expected_pipe_slice=expected_pipe_slice) + + def test_inference_batch_consistent( + self, batch_sizes=[2, 4, 13], additional_params_copy_to_batched_inputs=["num_inference_steps"] + ): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + logger = logging.get_logger(pipe.__module__) + logger.setLevel(level=diffusers.logging.FATAL) + + # batchify inputs + for batch_size in batch_sizes: + batched_inputs = {} + for name, value in inputs.items(): + if name in self.batch_params: + # prompt is string + if name == "prompt": + len_prompt = len(value) + # make unequal batch sizes + batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + + # make last batch super long + batched_inputs[name][-1] = 100 * "very long" + elif name == "image": + batched_images = [] + + for image in value: + batched_images.append(batch_size * [image]) + + batched_inputs[name] = batched_images + else: + batched_inputs[name] = batch_size * [value] + + elif name == "batch_size": + batched_inputs[name] = batch_size + else: + batched_inputs[name] = value + + for arg in additional_params_copy_to_batched_inputs: + batched_inputs[arg] = inputs[arg] + + batched_inputs["output_type"] = "np" + + output = pipe(**batched_inputs) + + assert len(output[0]) == batch_size + + batched_inputs["output_type"] = "np" + + output = pipe(**batched_inputs)[0] + + assert output.shape[0] == batch_size + + logger.setLevel(level=diffusers.logging.WARNING) + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + def test_num_images_per_prompt(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + batch_sizes = [1, 2] + num_images_per_prompts = [1, 2] + + for batch_size in batch_sizes: + for num_images_per_prompt in num_images_per_prompts: + inputs = self.get_dummy_inputs(torch_device) + + for key in inputs.keys(): + if key in self.batch_params: + if key == "image": + batched_images = [] + + for image in inputs[key]: + batched_images.append(batch_size * [image]) + + inputs[key] = batched_images + else: + inputs[key] = batch_size * [inputs[key]] + + images = pipe(**inputs, num_images_per_prompt=num_images_per_prompt)[0] + + assert images.shape[0] == batch_size * num_images_per_prompt + + def test_inference_batch_single_identical( + self, + batch_size=3, + test_max_difference=None, + test_mean_pixel_difference=None, + relax_max_difference=False, + expected_max_diff=2e-3, + additional_params_copy_to_batched_inputs=["num_inference_steps"], + ): + if test_max_difference is None: + # TODO(Pedro) - not sure why, but not at all reproducible at the moment it seems + # make sure that batched and non-batched is identical + test_max_difference = torch_device != "mps" + + if test_mean_pixel_difference is None: + # TODO same as above + test_mean_pixel_difference = torch_device != "mps" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + logger = logging.get_logger(pipe.__module__) + logger.setLevel(level=diffusers.logging.FATAL) + + # batchify inputs + batched_inputs = {} + for name, value in inputs.items(): + if name in self.batch_params: + # prompt is string + if name == "prompt": + len_prompt = len(value) + # make unequal batch sizes + batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + + # make last batch super long + batched_inputs[name][-1] = 100 * "very long" + elif name == "image": + batched_images = [] + + for image in value: + batched_images.append(batch_size * [image]) + + batched_inputs[name] = batched_images + else: + batched_inputs[name] = batch_size * [value] + elif name == "batch_size": + batched_inputs[name] = batch_size + elif name == "generator": + batched_inputs[name] = [self.get_generator(i) for i in range(batch_size)] + else: + batched_inputs[name] = value + + for arg in additional_params_copy_to_batched_inputs: + batched_inputs[arg] = inputs[arg] + + output_batch = pipe(**batched_inputs) + assert output_batch[0].shape[0] == batch_size + + inputs["generator"] = self.get_generator(0) + + output = pipe(**inputs) + + logger.setLevel(level=diffusers.logging.WARNING) + if test_max_difference: + if relax_max_difference: + # Taking the median of the largest differences + # is resilient to outliers + diff = np.abs(output_batch[0][0] - output[0][0]) + diff = diff.flatten() + diff.sort() + max_diff = np.median(diff[-5:]) + else: + max_diff = np.abs(output_batch[0][0] - output[0][0]).max() + assert max_diff < expected_max_diff + + if test_mean_pixel_difference: + assert_mean_pixel_difference(output_batch[0][0], output[0][0]) + + def test_adapter_sdxl_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_adapter_sdxl_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLAdapterPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + output = sd_pipe(**inputs) + image = output.images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..c5499847069fcae0337abc69d1e3aba63cbba050 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py @@ -0,0 +1,717 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch +from transformers import ( + CLIPImageProcessor, + CLIPTextConfig, + CLIPTextModel, + CLIPTextModelWithProjection, + CLIPTokenizer, + CLIPVisionConfig, + CLIPVisionModelWithProjection, +) + +from diffusers import ( + AutoencoderKL, + AutoencoderTiny, + EDMDPMSolverMultistepScheduler, + EulerDiscreteScheduler, + LCMScheduler, + StableDiffusionXLImg2ImgPipeline, + UNet2DConditionModel, +) + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + floats_tensor, + load_image, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import ( + IPAdapterTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionXLImg2ImgPipelineFastTests( + IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionXLImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union( + {"add_text_embeds", "add_time_ids", "add_neg_time_ids"} + ) + + supports_dduf = False + + def get_dummy_components(self, skip_first_text_encoder=False, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + time_cond_proj_dim=time_cond_proj_dim, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=72, # 5 * 8 + 32 + cross_attention_dim=64 if not skip_first_text_encoder else 32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=32, + image_size=224, + projection_dim=32, + intermediate_size=37, + num_attention_heads=4, + num_channels=3, + num_hidden_layers=5, + patch_size=14, + ) + + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + feature_extractor = CLIPImageProcessor( + crop_size=224, + do_center_crop=True, + do_normalize=True, + do_resize=True, + image_mean=[0.48145466, 0.4578275, 0.40821073], + image_std=[0.26862954, 0.26130258, 0.27577711], + resample=3, + size=224, + ) + + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder if not skip_first_text_encoder else None, + "tokenizer": tokenizer if not skip_first_text_encoder else None, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "requires_aesthetics_score": True, + "image_encoder": image_encoder, + "feature_extractor": feature_extractor, + } + return components + + def get_dummy_tiny_autoencoder(self): + return AutoencoderTiny(in_channels=3, out_channels=3, latent_channels=4) + + def test_components_function(self): + init_components = self.get_dummy_components() + init_components.pop("requires_aesthetics_score") + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "strength": 0.8, + } + return inputs + + def test_stable_diffusion_xl_img2img_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + + expected_slice = np.array([0.4664, 0.4886, 0.4403, 0.6902, 0.5592, 0.4534, 0.5931, 0.5951, 0.5224]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_xl_img2img_euler_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + + expected_slice = np.array([0.5604, 0.4352, 0.4717, 0.5844, 0.5101, 0.6704, 0.6290, 0.5460, 0.5286]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_xl_img2img_euler_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + + expected_slice = np.array([0.5604, 0.4352, 0.4717, 0.5844, 0.5101, 0.6704, 0.6290, 0.5460, 0.5286]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + @unittest.skip("Skip for now.") + def test_save_load_optional_components(self): + pass + + def test_ip_adapter(self): + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array([0.5133, 0.4626, 0.4970, 0.6273, 0.5160, 0.6891, 0.6639, 0.5892, 0.5709]) + + return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + + def test_stable_diffusion_xl_img2img_tiny_autoencoder(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.vae = self.get_dummy_tiny_autoencoder() + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1].flatten() + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.0, 0.0, 0.0106, 0.0, 0.0, 0.0087, 0.0052, 0.0062, 0.0177]) + + assert np.allclose(image_slice, expected_slice, atol=1e-4, rtol=1e-4) + + @require_torch_accelerator + def test_stable_diffusion_xl_offloads(self): + pipes = [] + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components).to(torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.enable_model_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.enable_sequential_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + image_slices = [] + for pipe in pipes: + pipe.unet.set_default_attn_processor() + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + image = pipe(**inputs).images + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3 + + def test_stable_diffusion_xl_multi_prompts(self): + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components).to(torch_device) + + # forward with single prompt + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["num_inference_steps"] = 5 + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with same prompt duplicated + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["num_inference_steps"] = 5 + inputs["prompt_2"] = inputs["prompt"] + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # ensure the results are equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + # forward with different prompt + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["num_inference_steps"] = 5 + inputs["prompt_2"] = "different prompt" + output = sd_pipe(**inputs) + image_slice_3 = output.images[0, -3:, -3:, -1] + + # ensure the results are not equal + assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 + + # manually set a negative_prompt + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["num_inference_steps"] = 5 + inputs["negative_prompt"] = "negative prompt" + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with same negative_prompt duplicated + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["num_inference_steps"] = 5 + inputs["negative_prompt"] = "negative prompt" + inputs["negative_prompt_2"] = inputs["negative_prompt"] + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # ensure the results are equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + # forward with different negative_prompt + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["num_inference_steps"] = 5 + inputs["negative_prompt"] = "negative prompt" + inputs["negative_prompt_2"] = "different negative prompt" + output = sd_pipe(**inputs) + image_slice_3 = output.images[0, -3:, -3:, -1] + + # ensure the results are not equal + assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 + + def test_stable_diffusion_xl_img2img_negative_conditions(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + sd_pipe = self.pipeline_class(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice_with_no_neg_conditions = image[0, -3:, -3:, -1] + + image = sd_pipe( + **inputs, + negative_original_size=(512, 512), + negative_crops_coords_top_left=( + 0, + 0, + ), + negative_target_size=(1024, 1024), + ).images + image_slice_with_neg_conditions = image[0, -3:, -3:, -1] + + assert ( + np.abs(image_slice_with_no_neg_conditions.flatten() - image_slice_with_neg_conditions.flatten()).max() + > 1e-4 + ) + + def test_pipeline_interrupt(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + prompt = "hey" + num_inference_steps = 5 + + # store intermediate latents from the generation process + class PipelineState: + def __init__(self): + self.state = [] + + def apply(self, pipe, i, t, callback_kwargs): + self.state.append(callback_kwargs["latents"]) + return callback_kwargs + + pipe_state = PipelineState() + sd_pipe( + prompt, + image=inputs["image"], + strength=0.8, + num_inference_steps=num_inference_steps, + output_type="np", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=pipe_state.apply, + ).images + + # interrupt generation at step index + interrupt_step_idx = 1 + + def callback_on_step_end(pipe, i, t, callback_kwargs): + if i == interrupt_step_idx: + pipe._interrupt = True + + return callback_kwargs + + output_interrupted = sd_pipe( + prompt, + image=inputs["image"], + strength=0.8, + num_inference_steps=num_inference_steps, + output_type="latent", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=callback_on_step_end, + ).images + + # fetch intermediate latents at the interrupted step + # from the completed generation process + intermediate_latent = pipe_state.state[interrupt_step_idx] + + # compare the intermediate latent to the output of the interrupted process + # they should be the same + assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) + + +class StableDiffusionXLImg2ImgRefinerOnlyPipelineFastTests( + PipelineLatentTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionXLImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"} + required_optional_params = PipelineTesterMixin.required_optional_params - {"latents"} + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=72, # 5 * 8 + 32 + cross_attention_dim=32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "tokenizer": None, + "text_encoder": None, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "requires_aesthetics_score": True, + "image_encoder": None, + "feature_extractor": None, + } + return components + + def test_components_function(self): + init_components = self.get_dummy_components() + init_components.pop("requires_aesthetics_score") + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "output_type": "np", + "strength": 0.8, + } + return inputs + + def test_stable_diffusion_xl_img2img_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + + expected_slice = np.array([0.4745, 0.4924, 0.4338, 0.6468, 0.5547, 0.4419, 0.5646, 0.5897, 0.5146]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + @require_torch_accelerator + def test_stable_diffusion_xl_offloads(self): + pipes = [] + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components).to(torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.enable_model_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) + sd_pipe.enable_sequential_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + image_slices = [] + for pipe in pipes: + pipe.unet.set_default_attn_processor() + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + image = pipe(**inputs).images + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3 + + def test_stable_diffusion_xl_img2img_negative_conditions(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + sd_pipe = self.pipeline_class(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice_with_no_neg_conditions = image[0, -3:, -3:, -1] + + image = sd_pipe( + **inputs, + negative_original_size=(512, 512), + negative_crops_coords_top_left=( + 0, + 0, + ), + negative_target_size=(1024, 1024), + ).images + image_slice_with_neg_conditions = image[0, -3:, -3:, -1] + + assert ( + np.abs(image_slice_with_no_neg_conditions.flatten() - image_slice_with_neg_conditions.flatten()).max() + > 1e-4 + ) + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + + +@slow +class StableDiffusionXLImg2ImgPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_diffusion_xl_img2img_playground(self): + torch.manual_seed(0) + model_path = "playgroundai/playground-v2.5-1024px-aesthetic" + + sd_pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( + model_path, torch_dtype=torch.float16, variant="fp16", add_watermarker=False + ) + + sd_pipe.enable_model_cpu_offload() + sd_pipe.scheduler = EDMDPMSolverMultistepScheduler.from_config( + sd_pipe.scheduler.config, use_karras_sigmas=True + ) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "a photo of an astronaut riding a horse on mars" + + url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png" + + init_image = load_image(url).convert("RGB") + + image = sd_pipe( + prompt, + num_inference_steps=30, + guidance_scale=8.0, + image=init_image, + height=1024, + width=1024, + output_type="np", + ).images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 1024, 1024, 3) + + expected_slice = np.array([0.3519, 0.3149, 0.3364, 0.3505, 0.3402, 0.3371, 0.3554, 0.3495, 0.3333]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f5779c7633eedb0ddeacc6286c5adcda8f1a37 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py @@ -0,0 +1,826 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import random +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import ( + CLIPImageProcessor, + CLIPTextConfig, + CLIPTextModel, + CLIPTextModelWithProjection, + CLIPTokenizer, + CLIPVisionConfig, + CLIPVisionModelWithProjection, +) + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + LCMScheduler, + StableDiffusionXLInpaintPipeline, + UNet2DConditionModel, + UniPCMultistepScheduler, +) + +from ...testing_utils import ( + enable_full_determinism, + floats_tensor, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import ( + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_PARAMS, + TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS, +) +from ..test_pipelines_common import IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin + + +enable_full_determinism() + + +class StableDiffusionXLInpaintPipelineFastTests( + IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableDiffusionXLInpaintPipeline + params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = frozenset([]) + # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union( + { + "add_text_embeds", + "add_time_ids", + "mask", + "masked_image_latents", + } + ) + + supports_dduf = False + + def get_dummy_components(self, skip_first_text_encoder=False, time_cond_proj_dim=None): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + time_cond_proj_dim=time_cond_proj_dim, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=72, # 5 * 8 + 32 + cross_attention_dim=64 if not skip_first_text_encoder else 32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=32, + image_size=224, + projection_dim=32, + intermediate_size=37, + num_attention_heads=4, + num_channels=3, + num_hidden_layers=5, + patch_size=14, + ) + + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + feature_extractor = CLIPImageProcessor( + crop_size=224, + do_center_crop=True, + do_normalize=True, + do_resize=True, + image_mean=[0.48145466, 0.4578275, 0.40821073], + image_std=[0.26862954, 0.26130258, 0.27577711], + resample=3, + size=224, + ) + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder if not skip_first_text_encoder else None, + "tokenizer": tokenizer if not skip_first_text_encoder else None, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + "image_encoder": image_encoder, + "feature_extractor": feature_extractor, + "requires_aesthetics_score": True, + } + return components + + def get_dummy_inputs(self, device, seed=0): + # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + # create mask + image[8:, 8:, :] = 255 + mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64)) + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "strength": 1.0, + "output_type": "np", + } + return inputs + + def get_dummy_inputs_2images(self, device, seed=0, img_res=64): + # Get random floats in [0, 1] as image with spatial size (img_res, img_res) + image1 = floats_tensor((1, 3, img_res, img_res), rng=random.Random(seed)).to(device) + image2 = floats_tensor((1, 3, img_res, img_res), rng=random.Random(seed + 22)).to(device) + # Convert images to [-1, 1] + init_image1 = 2.0 * image1 - 1.0 + init_image2 = 2.0 * image2 - 1.0 + + # empty mask + mask_image = torch.zeros((1, 1, img_res, img_res), device=device) + + if str(device).startswith("mps"): + generator1 = torch.manual_seed(seed) + generator2 = torch.manual_seed(seed) + else: + generator1 = torch.Generator(device=device).manual_seed(seed) + generator2 = torch.Generator(device=device).manual_seed(seed) + + inputs = { + "prompt": ["A painting of a squirrel eating a burger"] * 2, + "image": [init_image1, init_image2], + "mask_image": [mask_image] * 2, + "generator": [generator1, generator2], + "num_inference_steps": 2, + "guidance_scale": 6.0, + "output_type": "np", + } + return inputs + + def test_ip_adapter(self): + expected_pipe_slice = None + if torch_device == "cpu": + expected_pipe_slice = np.array([0.8274, 0.5538, 0.6141, 0.5843, 0.6865, 0.7082, 0.5861, 0.6123, 0.5344]) + + return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + + def test_components_function(self): + init_components = self.get_dummy_components() + init_components.pop("requires_aesthetics_score") + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + def test_stable_diffusion_xl_inpaint_euler(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array([0.8279, 0.5673, 0.6088, 0.6156, 0.6923, 0.7347, 0.6547, 0.6108, 0.5198]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_xl_inpaint_euler_lcm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array([0.6611, 0.5569, 0.5531, 0.5471, 0.5918, 0.6393, 0.5074, 0.5468, 0.5185]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_xl_inpaint_euler_lcm_custom_timesteps(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(time_cond_proj_dim=256) + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe.scheduler = LCMScheduler.from_config(sd_pipe.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + del inputs["num_inference_steps"] + inputs["timesteps"] = [999, 499] + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array([0.6611, 0.5569, 0.5531, 0.5471, 0.5918, 0.6393, 0.5074, 0.5468, 0.5185]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + @unittest.skip("Skip for now.") + def test_save_load_optional_components(self): + pass + + @require_torch_accelerator + def test_stable_diffusion_xl_inpaint_negative_prompt_embeds(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + # forward without prompt embeds + inputs = self.get_dummy_inputs(torch_device) + negative_prompt = 3 * ["this is a negative prompt"] + inputs["negative_prompt"] = negative_prompt + inputs["prompt"] = 3 * [inputs["prompt"]] + + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with prompt embeds + inputs = self.get_dummy_inputs(torch_device) + negative_prompt = 3 * ["this is a negative prompt"] + prompt = 3 * [inputs.pop("prompt")] + + ( + prompt_embeds, + negative_prompt_embeds, + pooled_prompt_embeds, + negative_pooled_prompt_embeds, + ) = sd_pipe.encode_prompt(prompt, negative_prompt=negative_prompt) + + output = sd_pipe( + **inputs, + prompt_embeds=prompt_embeds, + negative_prompt_embeds=negative_prompt_embeds, + pooled_prompt_embeds=pooled_prompt_embeds, + negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, + ) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # make sure that it's equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + @require_torch_accelerator + def test_stable_diffusion_xl_offloads(self): + pipes = [] + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe.enable_model_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe.enable_sequential_cpu_offload(device=torch_device) + pipes.append(sd_pipe) + + image_slices = [] + for pipe in pipes: + pipe.unet.set_default_attn_processor() + + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs).images + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3 + + def test_stable_diffusion_xl_refiner(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components(skip_first_text_encoder=True) + + sd_pipe = self.pipeline_class(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array([0.7540, 0.5231, 0.5833, 0.6217, 0.6339, 0.7067, 0.6507, 0.5672, 0.5030]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + def test_stable_diffusion_two_xl_mixture_of_denoiser_fast(self): + components = self.get_dummy_components() + pipe_1 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_1.unet.set_default_attn_processor() + pipe_2 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_2.unet.set_default_attn_processor() + + def assert_run_mixture( + num_steps, split, scheduler_cls_orig, num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps + ): + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = num_steps + + class scheduler_cls(scheduler_cls_orig): + pass + + pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config) + pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config) + + # Let's retrieve the number of timesteps we want to use + pipe_1.scheduler.set_timesteps(num_steps) + expected_steps = pipe_1.scheduler.timesteps.tolist() + + split_ts = num_train_timesteps - int(round(num_train_timesteps * split)) + + if pipe_1.scheduler.order == 2: + expected_steps_1 = list(filter(lambda ts: ts >= split_ts, expected_steps)) + expected_steps_2 = expected_steps_1[-1:] + list(filter(lambda ts: ts < split_ts, expected_steps)) + expected_steps = expected_steps_1 + expected_steps_2 + else: + expected_steps_1 = list(filter(lambda ts: ts >= split_ts, expected_steps)) + expected_steps_2 = list(filter(lambda ts: ts < split_ts, expected_steps)) + + # now we monkey patch step `done_steps` + # list into the step function for testing + done_steps = [] + old_step = copy.copy(scheduler_cls.step) + + def new_step(self, *args, **kwargs): + done_steps.append(args[1].cpu().item()) # args[1] is always the passed `t` + return old_step(self, *args, **kwargs) + + scheduler_cls.step = new_step + + inputs_1 = {**inputs, **{"denoising_end": split, "output_type": "latent"}} + latents = pipe_1(**inputs_1).images[0] + + assert expected_steps_1 == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + inputs_2 = {**inputs, **{"denoising_start": split, "image": latents}} + pipe_2(**inputs_2).images[0] + + assert expected_steps_2 == done_steps[len(expected_steps_1) :] + assert expected_steps == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + for steps in [7, 20]: + assert_run_mixture(steps, 0.33, EulerDiscreteScheduler) + assert_run_mixture(steps, 0.33, HeunDiscreteScheduler) + + @slow + def test_stable_diffusion_two_xl_mixture_of_denoiser(self): + components = self.get_dummy_components() + pipe_1 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_1.unet.set_default_attn_processor() + pipe_2 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_2.unet.set_default_attn_processor() + + def assert_run_mixture( + num_steps, split, scheduler_cls_orig, num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps + ): + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = num_steps + + class scheduler_cls(scheduler_cls_orig): + pass + + pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config) + pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config) + + # Let's retrieve the number of timesteps we want to use + pipe_1.scheduler.set_timesteps(num_steps) + expected_steps = pipe_1.scheduler.timesteps.tolist() + + split_ts = num_train_timesteps - int(round(num_train_timesteps * split)) + + if pipe_1.scheduler.order == 2: + expected_steps_1 = list(filter(lambda ts: ts >= split_ts, expected_steps)) + expected_steps_2 = expected_steps_1[-1:] + list(filter(lambda ts: ts < split_ts, expected_steps)) + expected_steps = expected_steps_1 + expected_steps_2 + else: + expected_steps_1 = list(filter(lambda ts: ts >= split_ts, expected_steps)) + expected_steps_2 = list(filter(lambda ts: ts < split_ts, expected_steps)) + + # now we monkey patch step `done_steps` + # list into the step function for testing + done_steps = [] + old_step = copy.copy(scheduler_cls.step) + + def new_step(self, *args, **kwargs): + done_steps.append(args[1].cpu().item()) # args[1] is always the passed `t` + return old_step(self, *args, **kwargs) + + scheduler_cls.step = new_step + + inputs_1 = {**inputs, **{"denoising_end": split, "output_type": "latent"}} + latents = pipe_1(**inputs_1).images[0] + + assert expected_steps_1 == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + inputs_2 = {**inputs, **{"denoising_start": split, "image": latents}} + pipe_2(**inputs_2).images[0] + + assert expected_steps_2 == done_steps[len(expected_steps_1) :] + assert expected_steps == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}" + + for steps in [5, 8, 20]: + for split in [0.33, 0.49, 0.71]: + for scheduler_cls in [ + DDIMScheduler, + EulerDiscreteScheduler, + DPMSolverMultistepScheduler, + UniPCMultistepScheduler, + HeunDiscreteScheduler, + ]: + assert_run_mixture(steps, split, scheduler_cls) + + @slow + def test_stable_diffusion_three_xl_mixture_of_denoiser(self): + components = self.get_dummy_components() + pipe_1 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_1.unet.set_default_attn_processor() + pipe_2 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_2.unet.set_default_attn_processor() + pipe_3 = StableDiffusionXLInpaintPipeline(**components).to(torch_device) + pipe_3.unet.set_default_attn_processor() + + def assert_run_mixture( + num_steps, + split_1, + split_2, + scheduler_cls_orig, + num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps, + ): + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = num_steps + + class scheduler_cls(scheduler_cls_orig): + pass + + pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config) + pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config) + pipe_3.scheduler = scheduler_cls.from_config(pipe_3.scheduler.config) + + # Let's retrieve the number of timesteps we want to use + pipe_1.scheduler.set_timesteps(num_steps) + expected_steps = pipe_1.scheduler.timesteps.tolist() + + split_1_ts = num_train_timesteps - int(round(num_train_timesteps * split_1)) + split_2_ts = num_train_timesteps - int(round(num_train_timesteps * split_2)) + + if pipe_1.scheduler.order == 2: + expected_steps_1 = list(filter(lambda ts: ts >= split_1_ts, expected_steps)) + expected_steps_2 = expected_steps_1[-1:] + list( + filter(lambda ts: ts >= split_2_ts and ts < split_1_ts, expected_steps) + ) + expected_steps_3 = expected_steps_2[-1:] + list(filter(lambda ts: ts < split_2_ts, expected_steps)) + expected_steps = expected_steps_1 + expected_steps_2 + expected_steps_3 + else: + expected_steps_1 = list(filter(lambda ts: ts >= split_1_ts, expected_steps)) + expected_steps_2 = list(filter(lambda ts: ts >= split_2_ts and ts < split_1_ts, expected_steps)) + expected_steps_3 = list(filter(lambda ts: ts < split_2_ts, expected_steps)) + + # now we monkey patch step `done_steps` + # list into the step function for testing + done_steps = [] + old_step = copy.copy(scheduler_cls.step) + + def new_step(self, *args, **kwargs): + done_steps.append(args[1].cpu().item()) # args[1] is always the passed `t` + return old_step(self, *args, **kwargs) + + scheduler_cls.step = new_step + + inputs_1 = {**inputs, **{"denoising_end": split_1, "output_type": "latent"}} + latents = pipe_1(**inputs_1).images[0] + + assert expected_steps_1 == done_steps, ( + f"Failure with {scheduler_cls.__name__} and {num_steps} and {split_1} and {split_2}" + ) + + inputs_2 = { + **inputs, + **{"denoising_start": split_1, "denoising_end": split_2, "image": latents, "output_type": "latent"}, + } + pipe_2(**inputs_2).images[0] + + assert expected_steps_2 == done_steps[len(expected_steps_1) :] + + inputs_3 = {**inputs, **{"denoising_start": split_2, "image": latents}} + pipe_3(**inputs_3).images[0] + + assert expected_steps_3 == done_steps[len(expected_steps_1) + len(expected_steps_2) :] + assert expected_steps == done_steps, ( + f"Failure with {scheduler_cls.__name__} and {num_steps} and {split_1} and {split_2}" + ) + + for steps in [7, 11, 20]: + for split_1, split_2 in zip([0.19, 0.32], [0.81, 0.68]): + for scheduler_cls in [ + DDIMScheduler, + EulerDiscreteScheduler, + DPMSolverMultistepScheduler, + UniPCMultistepScheduler, + HeunDiscreteScheduler, + ]: + assert_run_mixture(steps, split_1, split_2, scheduler_cls) + + def test_stable_diffusion_xl_multi_prompts(self): + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components).to(torch_device) + + # forward with single prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 5 + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with same prompt duplicated + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 5 + inputs["prompt_2"] = inputs["prompt"] + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # ensure the results are equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + # forward with different prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 5 + inputs["prompt_2"] = "different prompt" + output = sd_pipe(**inputs) + image_slice_3 = output.images[0, -3:, -3:, -1] + + # ensure the results are not equal + assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 + + # manually set a negative_prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 5 + inputs["negative_prompt"] = "negative prompt" + output = sd_pipe(**inputs) + image_slice_1 = output.images[0, -3:, -3:, -1] + + # forward with same negative_prompt duplicated + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 5 + inputs["negative_prompt"] = "negative prompt" + inputs["negative_prompt_2"] = inputs["negative_prompt"] + output = sd_pipe(**inputs) + image_slice_2 = output.images[0, -3:, -3:, -1] + + # ensure the results are equal + assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + + # forward with different negative_prompt + inputs = self.get_dummy_inputs(torch_device) + inputs["num_inference_steps"] = 5 + inputs["negative_prompt"] = "negative prompt" + inputs["negative_prompt_2"] = "different negative prompt" + output = sd_pipe(**inputs) + image_slice_3 = output.images[0, -3:, -3:, -1] + + # ensure the results are not equal + assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 + + def test_stable_diffusion_xl_img2img_negative_conditions(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice_with_no_neg_conditions = image[0, -3:, -3:, -1] + + image = sd_pipe( + **inputs, + negative_original_size=(512, 512), + negative_crops_coords_top_left=( + 0, + 0, + ), + negative_target_size=(1024, 1024), + ).images + image_slice_with_neg_conditions = image[0, -3:, -3:, -1] + + assert ( + np.abs(image_slice_with_no_neg_conditions.flatten() - image_slice_with_neg_conditions.flatten()).max() + > 1e-4 + ) + + def test_stable_diffusion_xl_inpaint_mask_latents(self): + device = "cpu" + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components).to(device) + sd_pipe.set_progress_bar_config(disable=None) + + # normal mask + normal image + ## `image`: pil, `mask_image``: pil, `masked_image_latents``: None + inputs = self.get_dummy_inputs(device) + inputs["strength"] = 0.9 + out_0 = sd_pipe(**inputs).images + + # image latents + mask latents + inputs = self.get_dummy_inputs(device) + image = sd_pipe.image_processor.preprocess(inputs["image"]).to(sd_pipe.device) + mask = sd_pipe.mask_processor.preprocess(inputs["mask_image"]).to(sd_pipe.device) + masked_image = image * (mask < 0.5) + + generator = torch.Generator(device=device).manual_seed(0) + image_latents = sd_pipe._encode_vae_image(image, generator=generator) + torch.randn((1, 4, 32, 32), generator=generator) + mask_latents = sd_pipe._encode_vae_image(masked_image, generator=generator) + inputs["image"] = image_latents + inputs["masked_image_latents"] = mask_latents + inputs["mask_image"] = mask + inputs["strength"] = 0.9 + generator = torch.Generator(device=device).manual_seed(0) + torch.randn((1, 4, 32, 32), generator=generator) + inputs["generator"] = generator + out_1 = sd_pipe(**inputs).images + assert np.abs(out_0 - out_1).max() < 1e-2 + + def test_stable_diffusion_xl_inpaint_2_images(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = self.pipeline_class(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + # test to confirm if we pass two same image, we will get same output + inputs = self.get_dummy_inputs(device) + gen1 = torch.Generator(device=device).manual_seed(0) + gen2 = torch.Generator(device=device).manual_seed(0) + for name in ["prompt", "image", "mask_image"]: + inputs[name] = [inputs[name]] * 2 + inputs["generator"] = [gen1, gen2] + images = sd_pipe(**inputs).images + + assert images.shape == (2, 64, 64, 3) + + image_slice1 = images[0, -3:, -3:, -1] + image_slice2 = images[1, -3:, -3:, -1] + assert np.abs(image_slice1.flatten() - image_slice2.flatten()).max() < 1e-4 + + # test to confirm that if we pass two different images, we will get different output + inputs = self.get_dummy_inputs_2images(device) + images = sd_pipe(**inputs).images + assert images.shape == (2, 64, 64, 3) + + image_slice1 = images[0, -3:, -3:, -1] + image_slice2 = images[1, -3:, -3:, -1] + assert np.abs(image_slice1.flatten() - image_slice2.flatten()).max() > 1e-2 + + def test_pipeline_interrupt(self): + components = self.get_dummy_components() + sd_pipe = StableDiffusionXLInpaintPipeline(**components) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + + prompt = "hey" + num_inference_steps = 5 + + # store intermediate latents from the generation process + class PipelineState: + def __init__(self): + self.state = [] + + def apply(self, pipe, i, t, callback_kwargs): + self.state.append(callback_kwargs["latents"]) + return callback_kwargs + + pipe_state = PipelineState() + sd_pipe( + prompt, + image=inputs["image"], + mask_image=inputs["mask_image"], + strength=0.8, + num_inference_steps=num_inference_steps, + output_type="np", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=pipe_state.apply, + ).images + + # interrupt generation at step index + interrupt_step_idx = 1 + + def callback_on_step_end(pipe, i, t, callback_kwargs): + if i == interrupt_step_idx: + pipe._interrupt = True + + return callback_kwargs + + output_interrupted = sd_pipe( + prompt, + image=inputs["image"], + mask_image=inputs["mask_image"], + strength=0.8, + num_inference_steps=num_inference_steps, + output_type="latent", + generator=torch.Generator("cpu").manual_seed(0), + callback_on_step_end=callback_on_step_end, + ).images + + # fetch intermediate latents at the interrupted step + # from the completed generation process + intermediate_latent = pipe_state.state[interrupt_step_idx] + + # compare the intermediate latent to the output of the interrupted process + # they should be the same + assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py new file mode 100644 index 0000000000000000000000000000000000000000..20a03583e7a965fae46c9655313e0844edbc4b35 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py @@ -0,0 +1,189 @@ +# coding=utf-8 +# Copyright 2025 Harutatsu Akiyama and HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import unittest + +import numpy as np +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + EulerDiscreteScheduler, + UNet2DConditionModel, +) +from diffusers.image_processor import VaeImageProcessor +from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_instruct_pix2pix import ( + StableDiffusionXLInstructPix2PixPipeline, +) + +from ...testing_utils import enable_full_determinism, floats_tensor, torch_device +from ..pipeline_params import ( + IMAGE_TO_IMAGE_IMAGE_PARAMS, + TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS, + TEXT_GUIDED_IMAGE_VARIATION_PARAMS, +) +from ..test_pipelines_common import ( + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class StableDiffusionXLInstructPix2PixPipelineFastTests( + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, +): + pipeline_class = StableDiffusionXLInstructPix2PixPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width", "cross_attention_kwargs"} + batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS + image_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + image_latents_params = IMAGE_TO_IMAGE_IMAGE_PARAMS + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=8, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + # SD2-specific config below + attention_head_dim=(2, 4), + use_linear_projection=True, + addition_embed_type="text_time", + addition_time_embed_dim=8, + transformer_layers_per_block=(1, 2), + projection_class_embeddings_input_dim=80, # 5 * 8 + 32 + cross_attention_dim=64, + ) + + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + steps_offset=1, + beta_schedule="scaled_linear", + timestep_spacing="leading", + ) + torch.manual_seed(0) + vae = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + sample_size=128, + ) + torch.manual_seed(0) + text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + # SD2-specific config below + hidden_act="gelu", + projection_dim=32, + ) + text_encoder = CLIPTextModel(text_encoder_config) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) + tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + components = { + "unet": unet, + "scheduler": scheduler, + "vae": vae, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "text_encoder_2": text_encoder_2, + "tokenizer_2": tokenizer_2, + } + return components + + def get_dummy_inputs(self, device, seed=0): + image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "image_guidance_scale": 1, + "output_type": "np", + } + return inputs + + def test_components_function(self): + init_components = self.get_dummy_components() + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_attention_slicing_forward_pass(self): + super().test_attention_slicing_forward_pass(expected_max_diff=2e-3) + + # Overwrite the default test_latents_inputs because pix2pix encode the image differently + def test_latents_input(self): + components = self.get_dummy_components() + pipe = StableDiffusionXLInstructPix2PixPipeline(**components) + pipe.image_processor = VaeImageProcessor(do_resize=False, do_normalize=False) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + out = pipe(**self.get_dummy_inputs_by_type(torch_device, input_image_type="pt"))[0] + + vae = components["vae"] + inputs = self.get_dummy_inputs_by_type(torch_device, input_image_type="pt") + + for image_param in self.image_latents_params: + if image_param in inputs.keys(): + inputs[image_param] = vae.encode(inputs[image_param]).latent_dist.mode() + + out_latents_inputs = pipe(**inputs)[0] + + max_diff = np.abs(out - out_latents_inputs).max() + self.assertLess(max_diff, 1e-4, "passing latents as image input generate different result from passing image") + + @unittest.skip("Test not supported at the moment.") + def test_cfg(self): + pass + + @unittest.skip("Functionality is tested elsewhere.") + def test_save_load_optional_components(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_unclip/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_unclip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_unclip/test_stable_unclip.py b/pythonProject/diffusers-main/tests/pipelines/stable_unclip/test_stable_unclip.py new file mode 100644 index 0000000000000000000000000000000000000000..8923c2f63ceee0c50198c4c1051de0c557150c1d --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_unclip/test_stable_unclip.py @@ -0,0 +1,257 @@ +import gc +import unittest + +import torch +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DDPMScheduler, + PriorTransformer, + StableUnCLIPPipeline, + UNet2DConditionModel, +) +from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + load_numpy, + nightly, + require_torch_accelerator, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import ( + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + assert_mean_pixel_difference, +) + + +enable_full_determinism() + + +class StableUnCLIPPipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableUnCLIPPipeline + params = TEXT_TO_IMAGE_PARAMS + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + + # TODO(will) Expected attn_bias.stride(1) == 0 to be true, but got false + test_xformers_attention = False + + def get_dummy_components(self): + embedder_hidden_size = 32 + embedder_projection_dim = embedder_hidden_size + + # prior components + + torch.manual_seed(0) + prior_tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + prior_text_encoder = CLIPTextModelWithProjection( + CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=embedder_hidden_size, + projection_dim=embedder_projection_dim, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + ) + + torch.manual_seed(0) + prior = PriorTransformer( + num_attention_heads=2, + attention_head_dim=12, + embedding_dim=embedder_projection_dim, + num_layers=1, + ) + + torch.manual_seed(0) + prior_scheduler = DDPMScheduler( + variance_type="fixed_small_log", + prediction_type="sample", + num_train_timesteps=1000, + clip_sample=True, + clip_sample_range=5.0, + beta_schedule="squaredcos_cap_v2", + ) + + # regular denoising components + + torch.manual_seed(0) + image_normalizer = StableUnCLIPImageNormalizer(embedding_dim=embedder_hidden_size) + image_noising_scheduler = DDPMScheduler(beta_schedule="squaredcos_cap_v2") + + torch.manual_seed(0) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + text_encoder = CLIPTextModel( + CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=embedder_hidden_size, + projection_dim=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + ) + + torch.manual_seed(0) + unet = UNet2DConditionModel( + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels=(32, 64), + attention_head_dim=(2, 4), + class_embed_type="projection", + # The class embeddings are the noise augmented image embeddings. + # I.e. the image embeddings concated with the noised embeddings of the same dimension + projection_class_embeddings_input_dim=embedder_projection_dim * 2, + cross_attention_dim=embedder_hidden_size, + layers_per_block=1, + upcast_attention=True, + use_linear_projection=True, + ) + + torch.manual_seed(0) + scheduler = DDIMScheduler( + beta_schedule="scaled_linear", + beta_start=0.00085, + beta_end=0.012, + prediction_type="v_prediction", + set_alpha_to_one=False, + steps_offset=1, + ) + + torch.manual_seed(0) + vae = AutoencoderKL() + + components = { + # prior components + "prior_tokenizer": prior_tokenizer, + "prior_text_encoder": prior_text_encoder, + "prior": prior, + "prior_scheduler": prior_scheduler, + # image noising components + "image_normalizer": image_normalizer, + "image_noising_scheduler": image_noising_scheduler, + # regular denoising components + "tokenizer": tokenizer, + "text_encoder": text_encoder, + "unet": unet, + "scheduler": scheduler, + "vae": vae, + } + + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "prior_num_inference_steps": 2, + "output_type": "np", + } + return inputs + + # Overriding PipelineTesterMixin::test_attention_slicing_forward_pass + # because UnCLIP GPU undeterminism requires a looser check. + def test_attention_slicing_forward_pass(self): + test_max_difference = torch_device == "cpu" + + self._test_attention_slicing_forward_pass(test_max_difference=test_max_difference) + + # Overriding PipelineTesterMixin::test_inference_batch_single_identical + # because UnCLIP undeterminism requires a looser check. + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-3) + + @unittest.skip("Test not supported because of the use of `_encode_prior_prompt()`.") + def test_encode_prompt_works_in_isolation(self): + pass + + +@nightly +@require_torch_accelerator +class StableUnCLIPPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_unclip(self): + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/stable_unclip_2_1_l_anime_turtle_fp16.npy" + ) + + pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16) + pipe.set_progress_bar_config(disable=None) + # stable unclip will oom when integration tests are run on a V100, + # so turn on memory savings + pipe.enable_attention_slicing() + pipe.enable_sequential_cpu_offload() + + generator = torch.Generator(device="cpu").manual_seed(0) + output = pipe("anime turtle", generator=generator, output_type="np") + + image = output.images[0] + + assert image.shape == (768, 768, 3) + + assert_mean_pixel_difference(image, expected_image) + + def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self): + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + pipe.enable_sequential_cpu_offload() + + _ = pipe( + "anime turtle", + prior_num_inference_steps=2, + num_inference_steps=2, + output_type="np", + ) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 7 GB is allocated + assert mem_bytes < 7 * 10**9 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py b/pythonProject/diffusers-main/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..e7a0fbccef67bc7e06f20b8e6a4b8a1579d7c9dd --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py @@ -0,0 +1,313 @@ +import gc +import random +import unittest + +import numpy as np +import torch +from transformers import ( + CLIPImageProcessor, + CLIPTextConfig, + CLIPTextModel, + CLIPTokenizer, + CLIPVisionConfig, + CLIPVisionModelWithProjection, +) + +from diffusers import AutoencoderKL, DDIMScheduler, DDPMScheduler, StableUnCLIPImg2ImgPipeline, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer +from diffusers.utils.import_utils import is_xformers_available + +from ...testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + floats_tensor, + load_image, + load_numpy, + nightly, + require_torch_accelerator, + skip_mps, + torch_device, +) +from ..pipeline_params import TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS, TEXT_GUIDED_IMAGE_VARIATION_PARAMS +from ..test_pipelines_common import ( + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + assert_mean_pixel_difference, +) + + +enable_full_determinism() + + +class StableUnCLIPImg2ImgPipelineFastTests( + PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase +): + pipeline_class = StableUnCLIPImg2ImgPipeline + params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS + batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS + image_params = frozenset( + [] + ) # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess + image_latents_params = frozenset([]) + + supports_dduf = False + + def get_dummy_components(self): + embedder_hidden_size = 32 + embedder_projection_dim = embedder_hidden_size + + # image encoding components + + feature_extractor = CLIPImageProcessor(crop_size=32, size=32) + + torch.manual_seed(0) + image_encoder = CLIPVisionModelWithProjection( + CLIPVisionConfig( + hidden_size=embedder_hidden_size, + projection_dim=embedder_projection_dim, + num_hidden_layers=5, + num_attention_heads=4, + image_size=32, + intermediate_size=37, + patch_size=1, + ) + ) + + # regular denoising components + + torch.manual_seed(0) + image_normalizer = StableUnCLIPImageNormalizer(embedding_dim=embedder_hidden_size) + image_noising_scheduler = DDPMScheduler(beta_schedule="squaredcos_cap_v2") + + torch.manual_seed(0) + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + torch.manual_seed(0) + text_encoder = CLIPTextModel( + CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=embedder_hidden_size, + projection_dim=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + ) + + torch.manual_seed(0) + unet = UNet2DConditionModel( + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels=(32, 64), + attention_head_dim=(2, 4), + class_embed_type="projection", + # The class embeddings are the noise augmented image embeddings. + # I.e. the image embeddings concated with the noised embeddings of the same dimension + projection_class_embeddings_input_dim=embedder_projection_dim * 2, + cross_attention_dim=embedder_hidden_size, + layers_per_block=1, + upcast_attention=True, + use_linear_projection=True, + ) + + torch.manual_seed(0) + scheduler = DDIMScheduler( + beta_schedule="scaled_linear", + beta_start=0.00085, + beta_end=0.012, + prediction_type="v_prediction", + set_alpha_to_one=False, + steps_offset=1, + ) + + torch.manual_seed(0) + vae = AutoencoderKL() + + components = { + # image encoding components + "feature_extractor": feature_extractor, + "image_encoder": image_encoder.eval(), + # image noising components + "image_normalizer": image_normalizer.eval(), + "image_noising_scheduler": image_noising_scheduler, + # regular denoising components + "tokenizer": tokenizer, + "text_encoder": text_encoder.eval(), + "unet": unet.eval(), + "scheduler": scheduler, + "vae": vae.eval(), + } + + return components + + def get_dummy_inputs(self, device, seed=0, pil_image=True): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + input_image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + + if pil_image: + input_image = input_image * 0.5 + 0.5 + input_image = input_image.clamp(0, 1) + input_image = input_image.cpu().permute(0, 2, 3, 1).float().numpy() + input_image = DiffusionPipeline.numpy_to_pil(input_image)[0] + + return { + "prompt": "An anime racoon running a marathon", + "image": input_image, + "generator": generator, + "num_inference_steps": 2, + "output_type": "np", + } + + @skip_mps + def test_image_embeds_none(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableUnCLIPImg2ImgPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs.update({"image_embeds": None}) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4397, 0.7080, 0.5590, 0.4255, 0.7181, 0.5938, 0.4051, 0.3720, 0.5116]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + # Overriding PipelineTesterMixin::test_attention_slicing_forward_pass + # because GPU undeterminism requires a looser check. + def test_attention_slicing_forward_pass(self): + test_max_difference = torch_device in ["cpu", "mps"] + + self._test_attention_slicing_forward_pass(test_max_difference=test_max_difference) + + # Overriding PipelineTesterMixin::test_inference_batch_single_identical + # because undeterminism requires a looser check. + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-3) + + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + self._test_xformers_attention_forwardGenerator_pass(test_max_difference=False) + + @unittest.skip("Test not supported at the moment.") + def test_encode_prompt_works_in_isolation(self): + pass + + +@nightly +@require_torch_accelerator +class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_stable_unclip_l_img2img(self): + input_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png" + ) + + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/stable_unclip_2_1_l_img2img_anime_turtle_fp16.npy" + ) + + pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( + "fusing/stable-unclip-2-1-l-img2img", torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + # stable unclip will oom when integration tests are run on a V100, + # so turn on memory savings + pipe.enable_attention_slicing() + pipe.enable_sequential_cpu_offload() + + generator = torch.Generator(device="cpu").manual_seed(0) + output = pipe(input_image, "anime turtle", generator=generator, output_type="np") + + image = output.images[0] + + assert image.shape == (768, 768, 3) + + assert_mean_pixel_difference(image, expected_image) + + def test_stable_unclip_h_img2img(self): + input_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png" + ) + + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/stable_unclip_2_1_h_img2img_anime_turtle_fp16.npy" + ) + + pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( + "fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + # stable unclip will oom when integration tests are run on a V100, + # so turn on memory savings + pipe.enable_attention_slicing() + pipe.enable_sequential_cpu_offload() + + generator = torch.Generator(device="cpu").manual_seed(0) + output = pipe(input_image, "anime turtle", generator=generator, output_type="np") + + image = output.images[0] + + assert image.shape == (768, 768, 3) + + assert_mean_pixel_difference(image, expected_image) + + def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self): + input_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png" + ) + + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) + + pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( + "fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16 + ) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + pipe.enable_sequential_cpu_offload() + + _ = pipe( + input_image, + "anime turtle", + num_inference_steps=2, + output_type="np", + ) + + mem_bytes = backend_max_memory_allocated(torch_device) + # make sure that less than 7 GB is allocated + assert mem_bytes < 7 * 10**9 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_video_diffusion/__init__.py b/pythonProject/diffusers-main/tests/pipelines/stable_video_diffusion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py b/pythonProject/diffusers-main/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..52595f7a8cd931b98a5cfa44ba7516eef8618b9e --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py @@ -0,0 +1,561 @@ +import gc +import random +import tempfile +import unittest + +import numpy as np +import torch +from transformers import ( + CLIPImageProcessor, + CLIPVisionConfig, + CLIPVisionModelWithProjection, +) + +import diffusers +from diffusers import ( + AutoencoderKLTemporalDecoder, + EulerDiscreteScheduler, + StableVideoDiffusionPipeline, + UNetSpatioTemporalConditionModel, +) +from diffusers.utils import load_image, logging +from diffusers.utils.import_utils import is_xformers_available + +from ...testing_utils import ( + CaptureLogger, + backend_empty_cache, + enable_full_determinism, + floats_tensor, + numpy_cosine_similarity_distance, + require_accelerate_version_greater, + require_accelerator, + require_torch_accelerator, + slow, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +def to_np(tensor): + if isinstance(tensor, torch.Tensor): + tensor = tensor.detach().cpu().numpy() + + return tensor + + +class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = StableVideoDiffusionPipeline + params = frozenset(["image"]) + batch_params = frozenset(["image", "generator"]) + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + ] + ) + + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + unet = UNetSpatioTemporalConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=8, + out_channels=4, + down_block_types=( + "CrossAttnDownBlockSpatioTemporal", + "DownBlockSpatioTemporal", + ), + up_block_types=("UpBlockSpatioTemporal", "CrossAttnUpBlockSpatioTemporal"), + cross_attention_dim=32, + num_attention_heads=8, + projection_class_embeddings_input_dim=96, + addition_time_embed_dim=32, + ) + scheduler = EulerDiscreteScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + interpolation_type="linear", + num_train_timesteps=1000, + prediction_type="v_prediction", + sigma_max=700.0, + sigma_min=0.002, + steps_offset=1, + timestep_spacing="leading", + timestep_type="continuous", + trained_betas=None, + use_karras_sigmas=True, + ) + + torch.manual_seed(0) + vae = AutoencoderKLTemporalDecoder( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + latent_channels=4, + ) + + torch.manual_seed(0) + config = CLIPVisionConfig( + hidden_size=32, + projection_dim=32, + num_hidden_layers=5, + num_attention_heads=4, + image_size=32, + intermediate_size=37, + patch_size=1, + ) + image_encoder = CLIPVisionModelWithProjection(config) + + torch.manual_seed(0) + feature_extractor = CLIPImageProcessor(crop_size=32, size=32) + components = { + "unet": unet, + "image_encoder": image_encoder, + "scheduler": scheduler, + "vae": vae, + "feature_extractor": feature_extractor, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + image = floats_tensor((1, 3, 32, 32), rng=random.Random(0)).to(device) + inputs = { + "generator": generator, + "image": image, + "num_inference_steps": 2, + "output_type": "pt", + "min_guidance_scale": 1.0, + "max_guidance_scale": 2.5, + "num_frames": 2, + "height": 32, + "width": 32, + } + return inputs + + @unittest.skip("Deprecated functionality") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip("Batched inference works and outputs look correct, but the test is failing") + def test_inference_batch_single_identical( + self, + batch_size=2, + expected_max_diff=1e-4, + ): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for components in pipe.components.values(): + if hasattr(components, "set_default_attn_processor"): + components.set_default_attn_processor() + pipe.to(torch_device) + + pipe.set_progress_bar_config(disable=None) + inputs = self.get_dummy_inputs(torch_device) + + # Reset generator in case it is has been used in self.get_dummy_inputs + inputs["generator"] = torch.Generator("cpu").manual_seed(0) + + logger = logging.get_logger(pipe.__module__) + logger.setLevel(level=diffusers.logging.FATAL) + + # batchify inputs + batched_inputs = {} + batched_inputs.update(inputs) + + batched_inputs["generator"] = [torch.Generator("cpu").manual_seed(0) for i in range(batch_size)] + batched_inputs["image"] = torch.cat([inputs["image"]] * batch_size, dim=0) + + output = pipe(**inputs).frames + output_batch = pipe(**batched_inputs).frames + + assert len(output_batch) == batch_size + + max_diff = np.abs(to_np(output_batch[0]) - to_np(output[0])).max() + assert max_diff < expected_max_diff + + @unittest.skip("Test is similar to test_inference_batch_single_identical") + def test_inference_batch_consistent(self): + pass + + def test_np_output_type(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["output_type"] = "np" + output = pipe(**inputs).frames + self.assertTrue(isinstance(output, np.ndarray)) + self.assertEqual(len(output.shape), 5) + + def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + output = pipe(**self.get_dummy_inputs(generator_device)).frames[0] + output_tuple = pipe(**self.get_dummy_inputs(generator_device), return_dict=False)[0] + + max_diff = np.abs(to_np(output) - to_np(output_tuple)).max() + self.assertLess(max_diff, expected_max_difference) + + @unittest.skip("Test is currently failing") + def test_float16_inference(self, expected_max_diff=5e-2): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + components = self.get_dummy_components() + pipe_fp16 = self.pipeline_class(**components) + for component in pipe_fp16.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe_fp16.to(torch_device, torch.float16) + pipe_fp16.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs).frames[0] + + fp16_inputs = self.get_dummy_inputs(torch_device) + output_fp16 = pipe_fp16(**fp16_inputs).frames[0] + + max_diff = np.abs(to_np(output) - to_np(output_fp16)).max() + self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.") + + @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") + @require_accelerator + def test_save_load_float16(self, expected_max_diff=1e-2): + components = self.get_dummy_components() + for name, module in components.items(): + if hasattr(module, "half"): + components[name] = module.to(torch_device).half() + + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs).frames[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for name, component in pipe_loaded.components.items(): + if hasattr(component, "dtype"): + self.assertTrue( + component.dtype == torch.float16, + f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.", + ) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs).frames[0] + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess( + max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading." + ) + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + if not hasattr(self.pipeline_class, "_optional_components"): + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + # set all optional components to None + for optional_component in pipe._optional_components: + setattr(pipe, optional_component, None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output = pipe(**inputs).frames[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for optional_component in pipe._optional_components: + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + output_loaded = pipe_loaded(**inputs).frames[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + def test_save_load_local(self, expected_max_difference=9e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs).frames[0] + + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(diffusers.logging.INFO) + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + + with CaptureLogger(logger) as cap_logger: + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + + for name in pipe_loaded.components.keys(): + if name not in pipe_loaded._optional_components: + assert name in str(cap_logger) + + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs).frames[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + @require_accelerator + def test_to_device(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.set_progress_bar_config(disable=None) + + pipe.to("cpu") + model_devices = [ + component.device.type for component in pipe.components.values() if hasattr(component, "device") + ] + self.assertTrue(all(device == "cpu" for device in model_devices)) + + output_cpu = pipe(**self.get_dummy_inputs("cpu")).frames[0] + self.assertTrue(np.isnan(output_cpu).sum() == 0) + + pipe.to(torch_device) + model_devices = [ + component.device.type for component in pipe.components.values() if hasattr(component, "device") + ] + self.assertTrue(all(device == torch_device for device in model_devices)) + + output_device = pipe(**self.get_dummy_inputs(torch_device)).frames[0] + self.assertTrue(np.isnan(to_np(output_device)).sum() == 0) + + def test_to_dtype(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.set_progress_bar_config(disable=None) + + model_dtypes = [component.dtype for component in pipe.components.values() if hasattr(component, "dtype")] + self.assertTrue(all(dtype == torch.float32 for dtype in model_dtypes)) + + pipe.to(dtype=torch.float16) + model_dtypes = [component.dtype for component in pipe.components.values() if hasattr(component, "dtype")] + self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes)) + + @require_accelerator + @require_accelerate_version_greater("0.14.0") + def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + output_without_offload = pipe(**inputs).frames[0] + + pipe.enable_sequential_cpu_offload(device=torch_device) + + inputs = self.get_dummy_inputs(generator_device) + output_with_offload = pipe(**inputs).frames[0] + + max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() + self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results") + + @require_accelerator + @require_accelerate_version_greater("0.17.0") + def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4): + generator_device = "cpu" + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(generator_device) + output_without_offload = pipe(**inputs).frames[0] + + pipe.enable_model_cpu_offload(device=torch_device) + inputs = self.get_dummy_inputs(generator_device) + output_with_offload = pipe(**inputs).frames[0] + + max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() + self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results") + offloaded_modules = [ + v + for k, v in pipe.components.items() + if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload + ] + ( + self.assertTrue(all(v.device.type == "cpu" for v in offloaded_modules)), + f"Not offloaded: {[v for v in offloaded_modules if v.device.type != 'cpu']}", + ) + + @unittest.skipIf( + torch_device != "cuda" or not is_xformers_available(), + reason="XFormers attention is only available with CUDA and `xformers` installed", + ) + def test_xformers_attention_forwardGenerator_pass(self): + expected_max_diff = 9e-4 + + if not self.test_xformers_attention: + return + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_without_offload = pipe(**inputs).frames[0] + output_without_offload = ( + output_without_offload.cpu() if torch.is_tensor(output_without_offload) else output_without_offload + ) + + pipe.enable_xformers_memory_efficient_attention() + inputs = self.get_dummy_inputs(torch_device) + output_with_offload = pipe(**inputs).frames[0] + output_with_offload = ( + output_with_offload.cpu() if torch.is_tensor(output_with_offload) else output_without_offload + ) + + max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() + self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results") + + def test_disable_cfg(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + inputs["max_guidance_scale"] = 1.0 + output = pipe(**inputs).frames + self.assertEqual(len(output.shape), 5) + + +@slow +@require_torch_accelerator +class StableVideoDiffusionPipelineSlowTests(unittest.TestCase): + def setUp(self): + # clean up the VRAM before each test + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_sd_video(self): + pipe = StableVideoDiffusionPipeline.from_pretrained( + "stabilityai/stable-video-diffusion-img2vid", + variant="fp16", + torch_dtype=torch.float16, + ) + pipe.enable_model_cpu_offload(device=torch_device) + pipe.set_progress_bar_config(disable=None) + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true" + ) + + generator = torch.Generator("cpu").manual_seed(0) + num_frames = 3 + + output = pipe( + image=image, + num_frames=num_frames, + generator=generator, + num_inference_steps=3, + output_type="np", + ) + + image = output.frames[0] + assert image.shape == (num_frames, 576, 1024, 3) + + image_slice = image[0, -3:, -3:, -1] + expected_slice = np.array([0.8592, 0.8645, 0.8499, 0.8722, 0.8769, 0.8421, 0.8557, 0.8528, 0.8285]) + assert numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice.flatten()) < 1e-3 diff --git a/pythonProject/diffusers-main/tests/pipelines/visualcloze/__init__.py b/pythonProject/diffusers-main/tests/pipelines/visualcloze/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py b/pythonProject/diffusers-main/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py new file mode 100644 index 0000000000000000000000000000000000000000..00ae0441fe9908b58a97637ee7fae509f7f93216 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py @@ -0,0 +1,344 @@ +import random +import tempfile +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel + +import diffusers +from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel, VisualClozePipeline +from diffusers.utils import logging + +from ...testing_utils import ( + CaptureLogger, + enable_full_determinism, + floats_tensor, + require_accelerator, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class VisualClozePipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = VisualClozePipeline + params = frozenset( + [ + "task_prompt", + "content_prompt", + "upsampling_height", + "upsampling_width", + "guidance_scale", + "prompt_embeds", + "pooled_prompt_embeds", + "upsampling_strength", + ] + ) + batch_params = frozenset(["task_prompt", "content_prompt", "image"]) + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = FluxTransformer2DModel( + patch_size=1, + in_channels=12, + out_channels=4, + num_layers=1, + num_single_layers=1, + attention_head_dim=6, + num_attention_heads=2, + joint_attention_dim=32, + pooled_projection_dim=32, + axes_dims_rope=[2, 2, 2], + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModel(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=1, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "transformer": transformer, + "vae": vae, + "resolution": 32, + } + + def get_dummy_inputs(self, device, seed=0): + # Create example images to simulate the input format required by VisualCloze + context_image = [ + Image.fromarray(floats_tensor((32, 32, 3), rng=random.Random(seed), scale=255).numpy().astype(np.uint8)) + for _ in range(2) + ] + query_image = [ + Image.fromarray( + floats_tensor((32, 32, 3), rng=random.Random(seed + 1), scale=255).numpy().astype(np.uint8) + ), + None, + ] + + # Create an image list that conforms to the VisualCloze input format + image = [ + context_image, # In-Context example + query_image, # Query image + ] + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "task_prompt": "Each row outlines a logical process, starting from [IMAGE1] gray-based depth map with detailed object contours, to achieve [IMAGE2] an image with flawless clarity.", + "content_prompt": "A beautiful landscape with mountains and a lake", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "upsampling_height": 32, + "upsampling_width": 32, + "max_sequence_length": 77, + "output_type": "np", + "upsampling_strength": 0.4, + } + return inputs + + def test_visualcloze_different_prompts(self): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + + inputs = self.get_dummy_inputs(torch_device) + output_same_prompt = pipe(**inputs).images[0] + + inputs = self.get_dummy_inputs(torch_device) + inputs["task_prompt"] = "A different task to perform." + output_different_prompts = pipe(**inputs).images[0] + + max_diff = np.abs(output_same_prompt - output_different_prompts).max() + + # Outputs should be different + assert max_diff > 1e-6 + + def test_visualcloze_image_output_shape(self): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + inputs = self.get_dummy_inputs(torch_device) + + height_width_pairs = [(32, 32), (72, 57)] + for height, width in height_width_pairs: + expected_height = height - height % (pipe.generation_pipe.vae_scale_factor * 2) + expected_width = width - width % (pipe.generation_pipe.vae_scale_factor * 2) + + inputs.update({"upsampling_height": height, "upsampling_width": width}) + image = pipe(**inputs).images[0] + output_height, output_width, _ = image.shape + assert (output_height, output_width) == (expected_height, expected_width) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-3) + + def test_upsampling_strength(self, expected_min_diff=1e-1): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + inputs = self.get_dummy_inputs(torch_device) + + # Test different upsampling strengths + inputs["upsampling_strength"] = 0.2 + output_no_upsampling = pipe(**inputs).images[0] + + inputs["upsampling_strength"] = 0.8 + output_full_upsampling = pipe(**inputs).images[0] + + # Different upsampling strengths should produce different outputs + max_diff = np.abs(output_no_upsampling - output_full_upsampling).max() + assert max_diff > expected_min_diff + + def test_different_task_prompts(self, expected_min_diff=1e-1): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + inputs = self.get_dummy_inputs(torch_device) + + output_original = pipe(**inputs).images[0] + + inputs["task_prompt"] = "A different task description for image generation" + output_different_task = pipe(**inputs).images[0] + + # Different task prompts should produce different outputs + max_diff = np.abs(output_original - output_different_task).max() + assert max_diff > expected_min_diff + + @unittest.skip( + "Test not applicable because the pipeline being tested is a wrapper pipeline. CFG tests should be done on the inner pipelines." + ) + def test_callback_cfg(self): + pass + + def test_save_load_local(self, expected_max_difference=5e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(diffusers.logging.INFO) + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + + with CaptureLogger(logger) as cap_logger: + # NOTE: Resolution must be set to 32 for loading otherwise will lead to OOM on CI hardware + # This attribute is not serialized in the config of the pipeline + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, resolution=32) + + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + for name in pipe_loaded.components.keys(): + if name not in pipe_loaded._optional_components: + assert name in str(cap_logger) + + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + if not hasattr(self.pipeline_class, "_optional_components"): + return + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + # set all optional components to None + for optional_component in pipe._optional_components: + setattr(pipe, optional_component, None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + # NOTE: Resolution must be set to 32 for loading otherwise will lead to OOM on CI hardware + # This attribute is not serialized in the config of the pipeline + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, resolution=32) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for optional_component in pipe._optional_components: + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") + @require_accelerator + def test_save_load_float16(self, expected_max_diff=1e-2): + components = self.get_dummy_components() + for name, module in components.items(): + if hasattr(module, "half"): + components[name] = module.to(torch_device).half() + + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + # NOTE: Resolution must be set to 32 for loading otherwise will lead to OOM on CI hardware + # This attribute is not serialized in the config of the pipeline + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16, resolution=32) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for name, component in pipe_loaded.components.items(): + if hasattr(component, "dtype"): + self.assertTrue( + component.dtype == torch.float16, + f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.", + ) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess( + max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading." + ) diff --git a/pythonProject/diffusers-main/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py b/pythonProject/diffusers-main/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..ab6b3ca5c587670bd21d8be65c0648632d385fb5 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py @@ -0,0 +1,312 @@ +import random +import tempfile +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel + +import diffusers +from diffusers import ( + AutoencoderKL, + FlowMatchEulerDiscreteScheduler, + FluxTransformer2DModel, + VisualClozeGenerationPipeline, +) +from diffusers.utils import logging + +from ...testing_utils import ( + CaptureLogger, + enable_full_determinism, + floats_tensor, + require_accelerator, + torch_device, +) +from ..test_pipelines_common import PipelineTesterMixin, to_np + + +enable_full_determinism() + + +class VisualClozeGenerationPipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = VisualClozeGenerationPipeline + params = frozenset( + [ + "task_prompt", + "content_prompt", + "guidance_scale", + "prompt_embeds", + "pooled_prompt_embeds", + ] + ) + batch_params = frozenset(["task_prompt", "content_prompt", "image"]) + test_xformers_attention = False + test_layerwise_casting = True + test_group_offloading = True + + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + transformer = FluxTransformer2DModel( + patch_size=1, + in_channels=12, + out_channels=4, + num_layers=1, + num_single_layers=1, + attention_head_dim=6, + num_attention_heads=2, + joint_attention_dim=32, + pooled_projection_dim=32, + axes_dims_rope=[2, 2, 2], + ) + clip_text_encoder_config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + hidden_act="gelu", + projection_dim=32, + ) + + torch.manual_seed(0) + text_encoder = CLIPTextModel(clip_text_encoder_config) + + torch.manual_seed(0) + text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + tokenizer_2 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + vae = AutoencoderKL( + sample_size=32, + in_channels=3, + out_channels=3, + block_out_channels=(4,), + layers_per_block=1, + latent_channels=1, + norm_num_groups=1, + use_quant_conv=False, + use_post_quant_conv=False, + shift_factor=0.0609, + scaling_factor=1.5035, + ) + + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "transformer": transformer, + "vae": vae, + "resolution": 32, + } + + def get_dummy_inputs(self, device, seed=0): + # Create example images to simulate the input format required by VisualCloze + context_image = [ + Image.fromarray(floats_tensor((32, 32, 3), rng=random.Random(seed), scale=255).numpy().astype(np.uint8)) + for _ in range(2) + ] + query_image = [ + Image.fromarray( + floats_tensor((32, 32, 3), rng=random.Random(seed + 1), scale=255).numpy().astype(np.uint8) + ), + None, + ] + + # Create an image list that conforms to the VisualCloze input format + image = [ + context_image, # In-Context example + query_image, # Query image + ] + + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device="cpu").manual_seed(seed) + + inputs = { + "task_prompt": "Each row outlines a logical process, starting from [IMAGE1] gray-based depth map with detailed object contours, to achieve [IMAGE2] an image with flawless clarity.", + "content_prompt": "A beautiful landscape with mountains and a lake", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "max_sequence_length": 77, + "output_type": "np", + } + return inputs + + def test_visualcloze_different_prompts(self): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + + inputs = self.get_dummy_inputs(torch_device) + output_same_prompt = pipe(**inputs).images[0] + + inputs = self.get_dummy_inputs(torch_device) + inputs["task_prompt"] = "A different task to perform." + output_different_prompts = pipe(**inputs).images[0] + + max_diff = np.abs(output_same_prompt - output_different_prompts).max() + + # Outputs should be different + assert max_diff > 1e-6 + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=1e-3) + + def test_different_task_prompts(self, expected_min_diff=1e-1): + pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + inputs = self.get_dummy_inputs(torch_device) + + output_original = pipe(**inputs).images[0] + + inputs["task_prompt"] = "A different task description for image generation" + output_different_task = pipe(**inputs).images[0] + + # Different task prompts should produce different outputs + max_diff = np.abs(output_original - output_different_task).max() + assert max_diff > expected_min_diff + + def test_save_load_local(self, expected_max_difference=5e-4): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(diffusers.logging.INFO) + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + + with CaptureLogger(logger) as cap_logger: + # NOTE: Resolution must be set to 32 for loading otherwise will lead to OOM on CI hardware + # This attribute is not serialized in the config of the pipeline + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, resolution=32) + + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + + for name in pipe_loaded.components.keys(): + if name not in pipe_loaded._optional_components: + assert name in str(cap_logger) + + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + if not hasattr(self.pipeline_class, "_optional_components"): + return + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + # set all optional components to None + for optional_component in pipe._optional_components: + setattr(pipe, optional_component, None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + # NOTE: Resolution must be set to 32 for loading otherwise will lead to OOM on CI hardware + # This attribute is not serialized in the config of the pipeline + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, resolution=32) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for optional_component in pipe._optional_components: + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess(max_diff, expected_max_difference) + + @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") + @require_accelerator + def test_save_load_float16(self, expected_max_diff=1e-2): + components = self.get_dummy_components() + for name, module in components.items(): + if hasattr(module, "half"): + components[name] = module.to(torch_device).half() + + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + # NOTE: Resolution must be set to 32 for loading otherwise will lead to OOM on CI hardware + # This attribute is not serialized in the config of the pipeline + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16, resolution=32) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for name, component in pipe_loaded.components.items(): + if hasattr(component, "dtype"): + self.assertTrue( + component.dtype == torch.float16, + f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.", + ) + + inputs = self.get_dummy_inputs(torch_device) + output_loaded = pipe_loaded(**inputs)[0] + max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() + self.assertLess( + max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading." + ) + + @unittest.skip("Skipped due to missing layout_prompt. Needs further investigation.") + def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict=None, atol=0.0001, rtol=0.0001): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/__init__.py b/pythonProject/diffusers-main/tests/pipelines/wan/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/test_wan.py b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan.py new file mode 100644 index 0000000000000000000000000000000000000000..106a7b294646767a193c5563507385c1487b5ede --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan.py @@ -0,0 +1,201 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import tempfile +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanPipeline, WanTransformer3DModel + +from ...testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class WanPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + # TODO: impl FlowDPMSolverMultistepScheduler + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "transformer_2": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + + # fmt: off + expected_slice = torch.tensor([0.4525, 0.452, 0.4485, 0.4534, 0.4524, 0.4529, 0.454, 0.453, 0.5127, 0.5326, 0.5204, 0.5253, 0.5439, 0.5424, 0.5133, 0.5078]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + # _optional_components include transformer, transformer_2, but only transformer_2 is optional for this wan2.1 t2v pipeline + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = "transformer_2" + + components = self.get_dummy_components() + components[optional_component] = None + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) + + +@slow +@require_torch_accelerator +class WanPipelineIntegrationTests(unittest.TestCase): + prompt = "A painting of a squirrel eating a burger." + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + @unittest.skip("TODO: test needs to be implemented") + def test_Wanx(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_22.py b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_22.py new file mode 100644 index 0000000000000000000000000000000000000000..56ef5ceb97edfc09dd2825c3f78c1a7fba02bffe --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_22.py @@ -0,0 +1,367 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +import unittest + +import numpy as np +import torch +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanPipeline, WanTransformer3DModel + +from ...testing_utils import ( + enable_full_determinism, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class Wan22PipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + torch.manual_seed(0) + transformer_2 = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "transformer_2": transformer_2, + "boundary_ratio": 0.875, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "dance monkey", + "negative_prompt": "negative", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class( + **components, + ) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + + # fmt: off + expected_slice = torch.tensor([0.4525, 0.452, 0.4485, 0.4534, 0.4524, 0.4529, 0.454, 0.453, 0.5127, 0.5326, 0.5204, 0.5253, 0.5439, 0.5424, 0.5133, 0.5078]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = "transformer" + + components = self.get_dummy_components() + components[optional_component] = None + components["boundary_ratio"] = 1.0 # for wan 2.2 14B, transformer is not used when boundary_ratio is 1.0 + + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + self.assertTrue( + getattr(pipe_loaded, "transformer") is None, + "`transformer` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) + + +class Wan225BPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=48, + in_channels=12, + out_channels=12, + is_residual=True, + patch_size=2, + latents_mean=[0.0] * 48, + latents_std=[1.0] * 48, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + scale_factor_spatial=16, + scale_factor_temporal=4, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=48, + out_channels=48, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "transformer_2": None, + "boundary_ratio": None, + "expand_timesteps": True, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 32, + "width": 32, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class( + **components, + ) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 32, 32)) + + # fmt: off + expected_slice = torch.tensor([[[0.4814, 0.4298, 0.5094, 0.4289, 0.5061, 0.4301, 0.5043, 0.4284, 0.5375, + 0.5965, 0.5527, 0.6014, 0.5228, 0.6076, 0.6644, 0.5651]]]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue( + torch.allclose(generated_slice, expected_slice, atol=1e-3), + f"generated_slice: {generated_slice}, expected_slice: {expected_slice}", + ) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + def test_components_function(self): + init_components = self.get_dummy_components() + init_components.pop("boundary_ratio") + init_components.pop("expand_timesteps") + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = "transformer_2" + + components = self.get_dummy_components() + components[optional_component] = None + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=2e-3) diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_22_image_to_video.py b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_22_image_to_video.py new file mode 100644 index 0000000000000000000000000000000000000000..6294d62044f3caa067647240b0fd9a5f6266c8b3 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_22_image_to_video.py @@ -0,0 +1,392 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanImageToVideoPipeline, WanTransformer3DModel + +from ...testing_utils import ( + enable_full_determinism, + torch_device, +) +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class Wan22ImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanImageToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=36, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + torch.manual_seed(0) + transformer_2 = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=36, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "transformer_2": transformer_2, + "image_encoder": None, + "image_processor": None, + "boundary_ratio": 0.875, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 16 + image_width = 16 + image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class( + **components, + ) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + + # fmt: off + expected_slice = torch.tensor([0.4527, 0.4526, 0.4498, 0.4539, 0.4521, 0.4524, 0.4533, 0.4535, 0.5154, + 0.5353, 0.5200, 0.5174, 0.5434, 0.5301, 0.5199, 0.5216]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue( + torch.allclose(generated_slice, expected_slice, atol=1e-3), + f"generated_slice: {generated_slice}, expected_slice: {expected_slice}", + ) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = ["transformer", "image_encoder", "image_processor"] + + components = self.get_dummy_components() + for component in optional_component: + components[component] = None + components["boundary_ratio"] = 1.0 # for wan 2.2 14B, transformer is not used when boundary_ratio is 1.0 + + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for component in optional_component: + self.assertTrue( + getattr(pipe_loaded, component) is None, + f"`{component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) + + +class Wan225BImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanImageToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=48, + in_channels=12, + out_channels=12, + is_residual=True, + patch_size=2, + latents_mean=[0.0] * 48, + latents_std=[1.0] * 48, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + scale_factor_spatial=16, + scale_factor_temporal=4, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(prediction_type="flow_prediction", use_flow_sigmas=True, flow_shift=3.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=48, + out_channels=48, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "transformer_2": None, + "image_encoder": None, + "image_processor": None, + "boundary_ratio": None, + "expand_timesteps": True, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 32 + image_width = 32 + image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class( + **components, + ) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 32, 32)) + + # fmt: off + expected_slice = torch.tensor([[0.4833, 0.4305, 0.5100, 0.4299, 0.5056, 0.4298, 0.5052, 0.4332, 0.5550, + 0.6092, 0.5536, 0.5928, 0.5199, 0.5864, 0.6705, 0.5493]]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue( + torch.allclose(generated_slice, expected_slice, atol=1e-3), + f"generated_slice: {generated_slice}, expected_slice: {expected_slice}", + ) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + def test_components_function(self): + init_components = self.get_dummy_components() + init_components.pop("boundary_ratio") + init_components.pop("expand_timesteps") + pipe = self.pipeline_class(**init_components) + + self.assertTrue(hasattr(pipe, "components")) + self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) + + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = ["transformer_2", "image_encoder", "image_processor"] + + components = self.get_dummy_components() + for component in optional_component: + components[component] = None + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + for component in optional_component: + self.assertTrue( + getattr(pipe_loaded, component) is None, + f"`{component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) + + def test_inference_batch_single_identical(self): + self._test_inference_batch_single_identical(expected_max_diff=2e-3) + + @unittest.skip("Test not supported") + def test_callback_inputs(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_image_to_video.py b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_image_to_video.py new file mode 100644 index 0000000000000000000000000000000000000000..07a9142f2553795d65996e62b28151cc2cdab7b2 --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_image_to_video.py @@ -0,0 +1,381 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import ( + AutoTokenizer, + CLIPImageProcessor, + CLIPVisionConfig, + CLIPVisionModelWithProjection, + T5EncoderModel, +) + +from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanImageToVideoPipeline, WanTransformer3DModel + +from ...testing_utils import enable_full_determinism, torch_device +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class WanImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanImageToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs", "height", "width"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + # TODO: impl FlowDPMSolverMultistepScheduler + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=36, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + image_dim=4, + ) + + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=4, + projection_dim=4, + num_hidden_layers=2, + num_attention_heads=2, + image_size=32, + intermediate_size=16, + patch_size=1, + ) + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + torch.manual_seed(0) + image_processor = CLIPImageProcessor(crop_size=32, size=32) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "image_encoder": image_encoder, + "image_processor": image_processor, + "transformer_2": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 16 + image_width = 16 + image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + + # fmt: off + expected_slice = torch.tensor([0.4525, 0.4525, 0.4497, 0.4536, 0.452, 0.4529, 0.454, 0.4535, 0.5072, 0.5527, 0.5165, 0.5244, 0.5481, 0.5282, 0.5208, 0.5214]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip("TODO: revisit failing as it requires a very high threshold to pass") + def test_inference_batch_single_identical(self): + pass + + # _optional_components include transformer, transformer_2 and image_encoder, image_processor, but only transformer_2 is optional for wan2.1 i2v pipeline + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = "transformer_2" + + components = self.get_dummy_components() + components[optional_component] = None + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) + + +class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanImageToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs", "height", "width"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + # TODO: impl FlowDPMSolverMultistepScheduler + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=36, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + image_dim=4, + pos_embed_seq_len=2 * (4 * 4 + 1), + ) + + torch.manual_seed(0) + image_encoder_config = CLIPVisionConfig( + hidden_size=4, + projection_dim=4, + num_hidden_layers=2, + num_attention_heads=2, + image_size=4, + intermediate_size=16, + patch_size=1, + ) + image_encoder = CLIPVisionModelWithProjection(image_encoder_config) + + torch.manual_seed(0) + image_processor = CLIPImageProcessor(crop_size=4, size=4) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + "image_encoder": image_encoder, + "image_processor": image_processor, + "transformer_2": None, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + image_height = 16 + image_width = 16 + image = Image.new("RGB", (image_width, image_height)) + last_image = Image.new("RGB", (image_width, image_height)) + inputs = { + "image": image, + "last_image": last_image, + "prompt": "dance monkey", + "negative_prompt": "negative", + "height": image_height, + "width": image_width, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "num_frames": 9, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (9, 3, 16, 16)) + + # fmt: off + expected_slice = torch.tensor([0.4531, 0.4527, 0.4498, 0.4542, 0.4526, 0.4527, 0.4534, 0.4534, 0.5061, 0.5185, 0.5283, 0.5181, 0.5309, 0.5365, 0.5113, 0.5244]) + # fmt: on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip("TODO: revisit failing as it requires a very high threshold to pass") + def test_inference_batch_single_identical(self): + pass + + # _optional_components include transformer, transformer_2 and image_encoder, image_processor, but only transformer_2 is optional for wan2.1 FLFT2V pipeline + def test_save_load_optional_components(self, expected_max_difference=1e-4): + optional_component = "transformer_2" + + components = self.get_dummy_components() + components[optional_component] = None + pipe = self.pipeline_class(**components) + for component in pipe.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output = pipe(**inputs)[0] + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir, safe_serialization=False) + pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) + for component in pipe_loaded.components.values(): + if hasattr(component, "set_default_attn_processor"): + component.set_default_attn_processor() + pipe_loaded.to(torch_device) + pipe_loaded.set_progress_bar_config(disable=None) + + self.assertTrue( + getattr(pipe_loaded, optional_component) is None, + f"`{optional_component}` did not stay set to None after loading.", + ) + + inputs = self.get_dummy_inputs(generator_device) + torch.manual_seed(0) + output_loaded = pipe_loaded(**inputs)[0] + + max_diff = np.abs(output.detach().cpu().numpy() - output_loaded.detach().cpu().numpy()).max() + self.assertLess(max_diff, expected_max_difference) diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_vace.py b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_vace.py new file mode 100644 index 0000000000000000000000000000000000000000..ed13d5649dc34bbf09fdc953ef8ee2f67922482d --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_vace.py @@ -0,0 +1,213 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch +from PIL import Image +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanVACEPipeline, WanVACETransformer3DModel + +from ...testing_utils import enable_full_determinism +from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import PipelineTesterMixin + + +enable_full_determinism() + + +class WanVACEPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanVACEPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = TEXT_TO_IMAGE_BATCH_PARAMS + image_params = TEXT_TO_IMAGE_IMAGE_PARAMS + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanVACETransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=3, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + vace_layers=[0, 2], + vace_in_channels=96, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + num_frames = 17 + height = 16 + width = 16 + + video = [Image.new("RGB", (height, width))] * num_frames + mask = [Image.new("L", (height, width), 0)] * num_frames + + inputs = { + "video": video, + "mask": mask, + "prompt": "dance monkey", + "negative_prompt": "negative", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "num_frames": num_frames, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames[0] + self.assertEqual(video.shape, (17, 3, 16, 16)) + + # fmt: off + expected_slice = [0.4523, 0.45198, 0.44872, 0.45326, 0.45211, 0.45258, 0.45344, 0.453, 0.52431, 0.52572, 0.50701, 0.5118, 0.53717, 0.53093, 0.50557, 0.51402] + # fmt: on + + video_slice = video.flatten() + video_slice = torch.cat([video_slice[:8], video_slice[-8:]]) + video_slice = [round(x, 5) for x in video_slice.tolist()] + self.assertTrue(np.allclose(video_slice, expected_slice, atol=1e-3)) + + def test_inference_with_single_reference_image(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["reference_images"] = Image.new("RGB", (16, 16)) + video = pipe(**inputs).frames[0] + self.assertEqual(video.shape, (17, 3, 16, 16)) + + # fmt: off + expected_slice = [0.45247, 0.45214, 0.44874, 0.45314, 0.45171, 0.45299, 0.45428, 0.45317, 0.51378, 0.52658, 0.53361, 0.52303, 0.46204, 0.50435, 0.52555, 0.51342] + # fmt: on + + video_slice = video.flatten() + video_slice = torch.cat([video_slice[:8], video_slice[-8:]]) + video_slice = [round(x, 5) for x in video_slice.tolist()] + self.assertTrue(np.allclose(video_slice, expected_slice, atol=1e-3)) + + def test_inference_with_multiple_reference_image(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + inputs["reference_images"] = [[Image.new("RGB", (16, 16))] * 2] + video = pipe(**inputs).frames[0] + self.assertEqual(video.shape, (17, 3, 16, 16)) + + # fmt: off + expected_slice = [0.45321, 0.45221, 0.44818, 0.45375, 0.45268, 0.4519, 0.45271, 0.45253, 0.51244, 0.52223, 0.51253, 0.51321, 0.50743, 0.51177, 0.51626, 0.50983] + # fmt: on + + video_slice = video.flatten() + video_slice = torch.cat([video_slice[:8], video_slice[-8:]]) + video_slice = [round(x, 5) for x in video_slice.tolist()] + self.assertTrue(np.allclose(video_slice, expected_slice, atol=1e-3)) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip("Errors out because passing multiple prompts at once is not yet supported by this pipeline.") + def test_encode_prompt_works_in_isolation(self): + pass + + @unittest.skip("Batching is not yet supported with this pipeline") + def test_inference_batch_consistent(self): + pass + + @unittest.skip("Batching is not yet supported with this pipeline") + def test_inference_batch_single_identical(self): + return super().test_inference_batch_single_identical() + + @unittest.skip( + "AutoencoderKLWan encoded latents are always in FP32. This test is not designed to handle mixed dtype inputs" + ) + def test_float16_inference(self): + pass + + @unittest.skip( + "AutoencoderKLWan encoded latents are always in FP32. This test is not designed to handle mixed dtype inputs" + ) + def test_save_load_float16(self): + pass diff --git a/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_video_to_video.py b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_video_to_video.py new file mode 100644 index 0000000000000000000000000000000000000000..27ada121ca485db206f08d761f000dd087147adc --- /dev/null +++ b/pythonProject/diffusers-main/tests/pipelines/wan/test_wan_video_to_video.py @@ -0,0 +1,149 @@ +# Copyright 2025 The HuggingFace Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import torch +from PIL import Image +from transformers import AutoTokenizer, T5EncoderModel + +from diffusers import AutoencoderKLWan, UniPCMultistepScheduler, WanTransformer3DModel, WanVideoToVideoPipeline + +from ...testing_utils import ( + enable_full_determinism, +) +from ..pipeline_params import TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS +from ..test_pipelines_common import ( + PipelineTesterMixin, +) + + +enable_full_determinism() + + +class WanVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + pipeline_class = WanVideoToVideoPipeline + params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"} + batch_params = frozenset(["video", "prompt", "negative_prompt"]) + image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + required_optional_params = frozenset( + [ + "num_inference_steps", + "generator", + "latents", + "return_dict", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + ) + test_xformers_attention = False + supports_dduf = False + + def get_dummy_components(self): + torch.manual_seed(0) + vae = AutoencoderKLWan( + base_dim=3, + z_dim=16, + dim_mult=[1, 1, 1, 1], + num_res_blocks=1, + temperal_downsample=[False, True, True], + ) + + torch.manual_seed(0) + scheduler = UniPCMultistepScheduler(flow_shift=3.0) + text_encoder = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5") + + torch.manual_seed(0) + transformer = WanTransformer3DModel( + patch_size=(1, 2, 2), + num_attention_heads=2, + attention_head_dim=12, + in_channels=16, + out_channels=16, + text_dim=32, + freq_dim=256, + ffn_dim=32, + num_layers=2, + cross_attn_norm=True, + qk_norm="rms_norm_across_heads", + rope_max_seq_len=32, + ) + + components = { + "transformer": transformer, + "vae": vae, + "scheduler": scheduler, + "text_encoder": text_encoder, + "tokenizer": tokenizer, + } + return components + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + + video = [Image.new("RGB", (16, 16))] * 17 + inputs = { + "video": video, + "prompt": "dance monkey", + "negative_prompt": "negative", # TODO + "generator": generator, + "num_inference_steps": 4, + "guidance_scale": 6.0, + "height": 16, + "width": 16, + "max_sequence_length": 16, + "output_type": "pt", + } + return inputs + + def test_inference(self): + device = "cpu" + + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + video = pipe(**inputs).frames + generated_video = video[0] + self.assertEqual(generated_video.shape, (17, 3, 16, 16)) + + # fmt: off + expected_slice = torch.tensor([0.4522, 0.4534, 0.4532, 0.4553, 0.4526, 0.4538, 0.4533, 0.4547, 0.513, 0.5176, 0.5286, 0.4958, 0.4955, 0.5381, 0.5154, 0.5195]) + # fmt:on + + generated_slice = generated_video.flatten() + generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]]) + self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3)) + + @unittest.skip("Test not supported") + def test_attention_slicing_forward_pass(self): + pass + + @unittest.skip( + "WanVideoToVideoPipeline has to run in mixed precision. Casting the entire pipeline will result in errors" + ) + def test_float16_inference(self): + pass + + @unittest.skip( + "WanVideoToVideoPipeline has to run in mixed precision. Save/Load the entire pipeline in FP16 will result in errors" + ) + def test_save_load_float16(self): + pass diff --git a/pythonProject/diffusers-main/tests/quantization/__init__.py b/pythonProject/diffusers-main/tests/quantization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/quantization/bnb/README.md b/pythonProject/diffusers-main/tests/quantization/bnb/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f1585581597d77c0f6d465d84d9d836741dd1671 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/bnb/README.md @@ -0,0 +1,44 @@ +The tests here are adapted from [`transformers` tests](https://github.com/huggingface/transformers/tree/409fcfdfccde77a14b7cc36972b774cabc371ae1/tests/quantization/bnb). + +They were conducted on the `audace` machine, using a single RTX 4090. Below is `nvidia-smi`: + +```bash ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 550.90.07 Driver Version: 550.90.07 CUDA Version: 12.4 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA GeForce RTX 4090 Off | 00000000:01:00.0 Off | Off | +| 30% 55C P0 61W / 450W | 1MiB / 24564MiB | 2% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 1 NVIDIA GeForce RTX 4090 Off | 00000000:13:00.0 Off | Off | +| 30% 51C P0 60W / 450W | 1MiB / 24564MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +``` + +`diffusers-cli`: + +```bash +- 🤗 Diffusers version: 0.31.0.dev0 +- Platform: Linux-5.15.0-117-generic-x86_64-with-glibc2.35 +- Running on Google Colab?: No +- Python version: 3.10.12 +- PyTorch version (GPU?): 2.5.0.dev20240818+cu124 (True) +- Flax version (CPU?/GPU?/TPU?): not installed (NA) +- Jax version: not installed +- JaxLib version: not installed +- Huggingface_hub version: 0.24.5 +- Transformers version: 4.44.2 +- Accelerate version: 0.34.0.dev0 +- PEFT version: 0.12.0 +- Bitsandbytes version: 0.43.3 +- Safetensors version: 0.4.4 +- xFormers version: not installed +- Accelerator: NVIDIA GeForce RTX 4090, 24564 MiB +NVIDIA GeForce RTX 4090, 24564 MiB +- Using GPU in script?: Yes +``` \ No newline at end of file diff --git a/pythonProject/diffusers-main/tests/quantization/bnb/__init__.py b/pythonProject/diffusers-main/tests/quantization/bnb/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/quantization/bnb/test_4bit.py b/pythonProject/diffusers-main/tests/quantization/bnb/test_4bit.py new file mode 100644 index 0000000000000000000000000000000000000000..c1da8f1ece78021c4c68de461e57cd492ad19db7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/bnb/test_4bit.py @@ -0,0 +1,895 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Team Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a clone of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import gc +import os +import tempfile +import unittest + +import numpy as np +import pytest +import safetensors.torch +from huggingface_hub import hf_hub_download +from PIL import Image + +from diffusers import ( + BitsAndBytesConfig, + DiffusionPipeline, + FluxControlPipeline, + FluxTransformer2DModel, + SD3Transformer2DModel, +) +from diffusers.quantizers import PipelineQuantizationConfig +from diffusers.utils import is_accelerate_version, logging + +from ...testing_utils import ( + CaptureLogger, + backend_empty_cache, + is_bitsandbytes_available, + is_torch_available, + is_transformers_available, + load_pt, + numpy_cosine_similarity_distance, + require_accelerate, + require_bitsandbytes_version_greater, + require_peft_backend, + require_torch, + require_torch_accelerator, + require_torch_version_greater, + require_transformers_version_greater, + slow, + torch_device, +) +from ..test_torch_compile_utils import QuantCompileTests + + +def get_some_linear_layer(model): + if model.__class__.__name__ in ["SD3Transformer2DModel", "FluxTransformer2DModel"]: + return model.transformer_blocks[0].attn.to_q + else: + return NotImplementedError("Don't know what layer to retrieve here.") + + +if is_transformers_available(): + from transformers import BitsAndBytesConfig as BnbConfig + from transformers import T5EncoderModel + +if is_torch_available(): + import torch + + from ..utils import LoRALayer, get_memory_consumption_stat + + +if is_bitsandbytes_available(): + import bitsandbytes as bnb + + from diffusers.quantizers.bitsandbytes.utils import replace_with_bnb_linear + + +@require_bitsandbytes_version_greater("0.43.2") +@require_accelerate +@require_torch +@require_torch_accelerator +@slow +class Base4bitTests(unittest.TestCase): + # We need to test on relatively large models (aka >1b parameters otherwise the quantiztion may not work as expected) + # Therefore here we use only SD3 to test our module + model_name = "stabilityai/stable-diffusion-3-medium-diffusers" + + # This was obtained on audace so the number might slightly change + expected_rel_difference = 3.69 + + expected_memory_saving_ratio = 0.8 + + prompt = "a beautiful sunset amidst the mountains." + num_inference_steps = 10 + seed = 0 + + @classmethod + def setUpClass(cls): + cls.is_deterministic_enabled = torch.are_deterministic_algorithms_enabled() + if not cls.is_deterministic_enabled: + torch.use_deterministic_algorithms(True) + + @classmethod + def tearDownClass(cls): + if not cls.is_deterministic_enabled: + torch.use_deterministic_algorithms(False) + + def get_dummy_inputs(self): + prompt_embeds = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt", + torch_device, + ) + pooled_prompt_embeds = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt", + torch_device, + ) + latent_model_input = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt", + torch_device, + ) + + input_dict_for_transformer = { + "hidden_states": latent_model_input, + "encoder_hidden_states": prompt_embeds, + "pooled_projections": pooled_prompt_embeds, + "timestep": torch.Tensor([1.0]), + "return_dict": False, + } + return input_dict_for_transformer + + +class BnB4BitBasicTests(Base4bitTests): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + # Models + self.model_fp16 = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", torch_dtype=torch.float16 + ) + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + self.model_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, device_map=torch_device + ) + + def tearDown(self): + if hasattr(self, "model_fp16"): + del self.model_fp16 + if hasattr(self, "model_4bit"): + del self.model_4bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_quantization_num_parameters(self): + r""" + Test if the number of returned parameters is correct + """ + num_params_4bit = self.model_4bit.num_parameters() + num_params_fp16 = self.model_fp16.num_parameters() + + self.assertEqual(num_params_4bit, num_params_fp16) + + def test_quantization_config_json_serialization(self): + r""" + A simple test to check if the quantization config is correctly serialized and deserialized + """ + config = self.model_4bit.config + + self.assertTrue("quantization_config" in config) + + _ = config["quantization_config"].to_dict() + _ = config["quantization_config"].to_diff_dict() + + _ = config["quantization_config"].to_json_string() + + def test_memory_footprint(self): + r""" + A simple test to check if the model conversion has been done correctly by checking on the + memory footprint of the converted model and the class type of the linear layers of the converted models + """ + mem_fp16 = self.model_fp16.get_memory_footprint() + mem_4bit = self.model_4bit.get_memory_footprint() + + self.assertAlmostEqual(mem_fp16 / mem_4bit, self.expected_rel_difference, delta=1e-2) + linear = get_some_linear_layer(self.model_4bit) + self.assertTrue(linear.weight.__class__ == bnb.nn.Params4bit) + + def test_model_memory_usage(self): + # Delete to not let anything interfere. + del self.model_4bit, self.model_fp16 + + # Re-instantiate. + inputs = self.get_dummy_inputs() + inputs = { + k: v.to(device=torch_device, dtype=torch.float16) for k, v in inputs.items() if not isinstance(v, bool) + } + model_fp16 = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", torch_dtype=torch.float16 + ).to(torch_device) + unquantized_model_memory = get_memory_consumption_stat(model_fp16, inputs) + del model_fp16 + + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + model_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, torch_dtype=torch.float16 + ) + quantized_model_memory = get_memory_consumption_stat(model_4bit, inputs) + assert unquantized_model_memory / quantized_model_memory >= self.expected_memory_saving_ratio + + def test_original_dtype(self): + r""" + A simple test to check if the model successfully stores the original dtype + """ + self.assertTrue("_pre_quantization_dtype" in self.model_4bit.config) + self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config) + self.assertTrue(self.model_4bit.config["_pre_quantization_dtype"] == torch.float16) + + def test_keep_modules_in_fp32(self): + r""" + A simple tests to check if the modules under `_keep_in_fp32_modules` are kept in fp32. + Also ensures if inference works. + """ + fp32_modules = SD3Transformer2DModel._keep_in_fp32_modules + SD3Transformer2DModel._keep_in_fp32_modules = ["proj_out"] + + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + model = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, device_map=torch_device + ) + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + if name in model._keep_in_fp32_modules: + self.assertTrue(module.weight.dtype == torch.float32) + else: + # 4-bit parameters are packed in uint8 variables + self.assertTrue(module.weight.dtype == torch.uint8) + + # test if inference works. + with torch.no_grad() and torch.amp.autocast(torch_device, dtype=torch.float16): + input_dict_for_transformer = self.get_dummy_inputs() + model_inputs = { + k: v.to(device=torch_device) for k, v in input_dict_for_transformer.items() if not isinstance(v, bool) + } + model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs}) + _ = model(**model_inputs) + + SD3Transformer2DModel._keep_in_fp32_modules = fp32_modules + + def test_linear_are_4bit(self): + r""" + A simple test to check if the model conversion has been done correctly by checking on the + memory footprint of the converted model and the class type of the linear layers of the converted models + """ + self.model_fp16.get_memory_footprint() + self.model_4bit.get_memory_footprint() + + for name, module in self.model_4bit.named_modules(): + if isinstance(module, torch.nn.Linear): + if name not in ["proj_out"]: + # 4-bit parameters are packed in uint8 variables + self.assertTrue(module.weight.dtype == torch.uint8) + + def test_config_from_pretrained(self): + transformer_4bit = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/flux.1-dev-nf4-pkg", subfolder="transformer" + ) + linear = get_some_linear_layer(transformer_4bit) + self.assertTrue(linear.weight.__class__ == bnb.nn.Params4bit) + self.assertTrue(hasattr(linear.weight, "quant_state")) + self.assertTrue(linear.weight.quant_state.__class__ == bnb.functional.QuantState) + + def test_device_assignment(self): + mem_before = self.model_4bit.get_memory_footprint() + + # Move to CPU + self.model_4bit.to("cpu") + self.assertEqual(self.model_4bit.device.type, "cpu") + self.assertAlmostEqual(self.model_4bit.get_memory_footprint(), mem_before) + + # Move back to CUDA device + for device in [0, f"{torch_device}", f"{torch_device}:0", "call()"]: + if device == "call()": + self.model_4bit.to(f"{torch_device}:0") + else: + self.model_4bit.to(device) + self.assertEqual(self.model_4bit.device, torch.device(0)) + self.assertAlmostEqual(self.model_4bit.get_memory_footprint(), mem_before) + self.model_4bit.to("cpu") + + def test_device_and_dtype_assignment(self): + r""" + Test whether trying to cast (or assigning a device to) a model after converting it in 4-bit will throw an error. + Checks also if other models are casted correctly. Device placement, however, is supported. + """ + with self.assertRaises(ValueError): + # Tries with a `dtype` + self.model_4bit.to(torch.float16) + + with self.assertRaises(ValueError): + # Tries with a `device` and `dtype` + self.model_4bit.to(device=f"{torch_device}:0", dtype=torch.float16) + + with self.assertRaises(ValueError): + # Tries with a cast + self.model_4bit.float() + + with self.assertRaises(ValueError): + # Tries with a cast + self.model_4bit.half() + + # This should work + self.model_4bit.to(torch_device) + + # Test if we did not break anything + self.model_fp16 = self.model_fp16.to(dtype=torch.float32, device=torch_device) + input_dict_for_transformer = self.get_dummy_inputs() + model_inputs = { + k: v.to(dtype=torch.float32, device=torch_device) + for k, v in input_dict_for_transformer.items() + if not isinstance(v, bool) + } + model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs}) + with torch.no_grad(): + _ = self.model_fp16(**model_inputs) + + # Check this does not throw an error + _ = self.model_fp16.to("cpu") + + # Check this does not throw an error + _ = self.model_fp16.half() + + # Check this does not throw an error + _ = self.model_fp16.float() + + # Check that this does not throw an error + _ = self.model_fp16.to(torch_device) + + def test_bnb_4bit_wrong_config(self): + r""" + Test whether creating a bnb config with unsupported values leads to errors. + """ + with self.assertRaises(ValueError): + _ = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_storage="add") + + def test_bnb_4bit_errors_loading_incorrect_state_dict(self): + r""" + Test if loading with an incorrect state dict raises an error. + """ + with tempfile.TemporaryDirectory() as tmpdirname: + nf4_config = BitsAndBytesConfig(load_in_4bit=True) + model_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, device_map=torch_device + ) + model_4bit.save_pretrained(tmpdirname) + del model_4bit + + with self.assertRaises(ValueError) as err_context: + state_dict = safetensors.torch.load_file( + os.path.join(tmpdirname, "diffusion_pytorch_model.safetensors") + ) + + # corrupt the state dict + key_to_target = "context_embedder.weight" # can be other keys too. + compatible_param = state_dict[key_to_target] + corrupted_param = torch.randn(compatible_param.shape[0] - 1, 1) + state_dict[key_to_target] = bnb.nn.Params4bit(corrupted_param, requires_grad=False) + safetensors.torch.save_file( + state_dict, os.path.join(tmpdirname, "diffusion_pytorch_model.safetensors") + ) + + _ = SD3Transformer2DModel.from_pretrained(tmpdirname) + + assert key_to_target in str(err_context.exception) + + def test_bnb_4bit_logs_warning_for_no_quantization(self): + model_with_no_linear = torch.nn.Sequential(torch.nn.Conv2d(4, 4, 3), torch.nn.ReLU()) + quantization_config = BitsAndBytesConfig(load_in_4bit=True) + logger = logging.get_logger("diffusers.quantizers.bitsandbytes.utils") + logger.setLevel(30) + with CaptureLogger(logger) as cap_logger: + _ = replace_with_bnb_linear(model_with_no_linear, quantization_config=quantization_config) + assert ( + "You are loading your model in 8bit or 4bit but no linear modules were found in your model." + in cap_logger.out + ) + + +class BnB4BitTrainingTests(Base4bitTests): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + self.model_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, device_map=torch_device + ) + + def test_training(self): + # Step 1: freeze all parameters + for param in self.model_4bit.parameters(): + param.requires_grad = False # freeze the model - train adapters later + if param.ndim == 1: + # cast the small parameters (e.g. layernorm) to fp32 for stability + param.data = param.data.to(torch.float32) + + # Step 2: add adapters + for _, module in self.model_4bit.named_modules(): + if "Attention" in repr(type(module)): + module.to_k = LoRALayer(module.to_k, rank=4) + module.to_q = LoRALayer(module.to_q, rank=4) + module.to_v = LoRALayer(module.to_v, rank=4) + + # Step 3: dummy batch + input_dict_for_transformer = self.get_dummy_inputs() + model_inputs = { + k: v.to(device=torch_device) for k, v in input_dict_for_transformer.items() if not isinstance(v, bool) + } + model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs}) + + # Step 4: Check if the gradient is not None + with torch.amp.autocast(torch_device, dtype=torch.float16): + out = self.model_4bit(**model_inputs)[0] + out.norm().backward() + + for module in self.model_4bit.modules(): + if isinstance(module, LoRALayer): + self.assertTrue(module.adapter[1].weight.grad is not None) + self.assertTrue(module.adapter[1].weight.grad.norm().item() > 0) + + +@require_transformers_version_greater("4.44.0") +class SlowBnb4BitTests(Base4bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + model_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, device_map=torch_device + ) + self.pipeline_4bit = DiffusionPipeline.from_pretrained( + self.model_name, transformer=model_4bit, torch_dtype=torch.float16 + ) + self.pipeline_4bit.enable_model_cpu_offload() + + def tearDown(self): + del self.pipeline_4bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_quality(self): + output = self.pipeline_4bit( + prompt=self.prompt, + num_inference_steps=self.num_inference_steps, + generator=torch.manual_seed(self.seed), + output_type="np", + ).images + + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.1123, 0.1296, 0.1609, 0.1042, 0.1230, 0.1274, 0.0928, 0.1165, 0.1216]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-2) + + def test_generate_quality_dequantize(self): + r""" + Test that loading the model and unquantize it produce correct results. + """ + self.pipeline_4bit.transformer.dequantize() + output = self.pipeline_4bit( + prompt=self.prompt, + num_inference_steps=self.num_inference_steps, + generator=torch.manual_seed(self.seed), + output_type="np", + ).images + + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.1216, 0.1387, 0.1584, 0.1152, 0.1318, 0.1282, 0.1062, 0.1226, 0.1228]) + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3) + + # Since we offloaded the `pipeline_4bit.transformer` to CPU (result of `enable_model_cpu_offload()), check + # the following. + self.assertTrue(self.pipeline_4bit.transformer.device.type == "cpu") + # calling it again shouldn't be a problem + _ = self.pipeline_4bit( + prompt=self.prompt, + num_inference_steps=2, + generator=torch.manual_seed(self.seed), + output_type="np", + ).images + + def test_moving_to_cpu_throws_warning(self): + nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + model_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=nf4_config, device_map=torch_device + ) + + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(30) + with CaptureLogger(logger) as cap_logger: + # Because `model.dtype` will return torch.float16 as SD3 transformer has + # a conv layer as the first layer. + _ = DiffusionPipeline.from_pretrained( + self.model_name, transformer=model_4bit, torch_dtype=torch.float16 + ).to("cpu") + + assert "Pipelines loaded with `dtype=torch.float16`" in cap_logger.out + + @pytest.mark.xfail( + condition=is_accelerate_version("<=", "1.1.1"), + reason="Test will pass after https://github.com/huggingface/accelerate/pull/3223 is in a release.", + strict=True, + ) + def test_pipeline_cuda_placement_works_with_nf4(self): + transformer_nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + transformer_4bit = SD3Transformer2DModel.from_pretrained( + self.model_name, + subfolder="transformer", + quantization_config=transformer_nf4_config, + torch_dtype=torch.float16, + device_map=torch_device, + ) + text_encoder_3_nf4_config = BnbConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + text_encoder_3_4bit = T5EncoderModel.from_pretrained( + self.model_name, + subfolder="text_encoder_3", + quantization_config=text_encoder_3_nf4_config, + torch_dtype=torch.float16, + device_map=torch_device, + ) + # CUDA device placement works. + pipeline_4bit = DiffusionPipeline.from_pretrained( + self.model_name, + transformer=transformer_4bit, + text_encoder_3=text_encoder_3_4bit, + torch_dtype=torch.float16, + ).to(torch_device) + + # Check if inference works. + _ = pipeline_4bit(self.prompt, max_sequence_length=20, num_inference_steps=2) + + del pipeline_4bit + + def test_device_map(self): + """ + Test if the quantized model is working properly with "auto". + cpu/disk offloading as well doesn't work with bnb. + """ + + def get_dummy_tensor_inputs(device=None, seed: int = 0): + batch_size = 1 + num_latent_channels = 4 + num_image_channels = 3 + height = width = 4 + sequence_length = 48 + embedding_dim = 32 + + torch.manual_seed(seed) + hidden_states = torch.randn((batch_size, height * width, num_latent_channels)).to( + device, dtype=torch.bfloat16 + ) + torch.manual_seed(seed) + encoder_hidden_states = torch.randn((batch_size, sequence_length, embedding_dim)).to( + device, dtype=torch.bfloat16 + ) + + torch.manual_seed(seed) + pooled_prompt_embeds = torch.randn((batch_size, embedding_dim)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + text_ids = torch.randn((sequence_length, num_image_channels)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + image_ids = torch.randn((height * width, num_image_channels)).to(device, dtype=torch.bfloat16) + + timestep = torch.tensor([1.0]).to(device, dtype=torch.bfloat16).expand(batch_size) + + return { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_prompt_embeds, + "txt_ids": text_ids, + "img_ids": image_ids, + "timestep": timestep, + } + + inputs = get_dummy_tensor_inputs(torch_device) + expected_slice = np.array( + [0.47070312, 0.00390625, -0.03662109, -0.19628906, -0.53125, 0.5234375, -0.17089844, -0.59375, 0.578125] + ) + + # non sharded + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16 + ) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + device_map="auto", + torch_dtype=torch.bfloat16, + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, bnb.nn.modules.Params4bit)) + + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 1e-3) + + # sharded + + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16 + ) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-sharded", + subfolder="transformer", + quantization_config=quantization_config, + device_map="auto", + torch_dtype=torch.bfloat16, + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, bnb.nn.modules.Params4bit)) + + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 1e-3) + + +@require_transformers_version_greater("4.44.0") +class SlowBnb4BitFluxTests(Base4bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + model_id = "hf-internal-testing/flux.1-dev-nf4-pkg" + t5_4bit = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder_2") + transformer_4bit = FluxTransformer2DModel.from_pretrained(model_id, subfolder="transformer") + self.pipeline_4bit = DiffusionPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + text_encoder_2=t5_4bit, + transformer=transformer_4bit, + torch_dtype=torch.float16, + ) + self.pipeline_4bit.enable_model_cpu_offload() + + def tearDown(self): + del self.pipeline_4bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_quality(self): + # keep the resolution and max tokens to a lower number for faster execution. + output = self.pipeline_4bit( + prompt=self.prompt, + num_inference_steps=self.num_inference_steps, + generator=torch.manual_seed(self.seed), + height=256, + width=256, + max_sequence_length=64, + output_type="np", + ).images + + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.0583, 0.0586, 0.0632, 0.0815, 0.0813, 0.0947, 0.1040, 0.1145, 0.1265]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3) + + @require_peft_backend + def test_lora_loading(self): + self.pipeline_4bit.load_lora_weights( + hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), adapter_name="hyper-sd" + ) + self.pipeline_4bit.set_adapters("hyper-sd", adapter_weights=0.125) + + output = self.pipeline_4bit( + prompt=self.prompt, + height=256, + width=256, + max_sequence_length=64, + output_type="np", + num_inference_steps=8, + generator=torch.Generator().manual_seed(42), + ).images + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.5347, 0.5342, 0.5283, 0.5093, 0.4988, 0.5093, 0.5044, 0.5015, 0.4946]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3) + + +@require_transformers_version_greater("4.44.0") +@require_peft_backend +class SlowBnb4BitFluxControlWithLoraTests(Base4bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + self.pipeline_4bit = FluxControlPipeline.from_pretrained("eramth/flux-4bit", torch_dtype=torch.float16) + self.pipeline_4bit.enable_model_cpu_offload() + + def tearDown(self): + del self.pipeline_4bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_lora_loading(self): + self.pipeline_4bit.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora") + + output = self.pipeline_4bit( + prompt=self.prompt, + control_image=Image.new(mode="RGB", size=(256, 256)), + height=256, + width=256, + max_sequence_length=64, + output_type="np", + num_inference_steps=8, + generator=torch.Generator().manual_seed(42), + ).images + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.1636, 0.1675, 0.1982, 0.1743, 0.1809, 0.1936, 0.1743, 0.2095, 0.2139]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3, msg=f"{out_slice=} != {expected_slice=}") + + +@slow +class BaseBnb4BitSerializationTests(Base4bitTests): + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def test_serialization(self, quant_type="nf4", double_quant=True, safe_serialization=True): + r""" + Test whether it is possible to serialize a model in 4-bit. Uses most typical params as default. + See ExtendedSerializationTest class for more params combinations. + """ + + self.quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type=quant_type, + bnb_4bit_use_double_quant=double_quant, + bnb_4bit_compute_dtype=torch.bfloat16, + ) + model_0 = SD3Transformer2DModel.from_pretrained( + self.model_name, + subfolder="transformer", + quantization_config=self.quantization_config, + device_map=torch_device, + ) + self.assertTrue("_pre_quantization_dtype" in model_0.config) + with tempfile.TemporaryDirectory() as tmpdirname: + model_0.save_pretrained(tmpdirname, safe_serialization=safe_serialization) + + config = SD3Transformer2DModel.load_config(tmpdirname) + self.assertTrue("quantization_config" in config) + self.assertTrue("_pre_quantization_dtype" not in config) + + model_1 = SD3Transformer2DModel.from_pretrained(tmpdirname) + + # checking quantized linear module weight + linear = get_some_linear_layer(model_1) + self.assertTrue(linear.weight.__class__ == bnb.nn.Params4bit) + self.assertTrue(hasattr(linear.weight, "quant_state")) + self.assertTrue(linear.weight.quant_state.__class__ == bnb.functional.QuantState) + + # checking memory footpring + self.assertAlmostEqual(model_0.get_memory_footprint() / model_1.get_memory_footprint(), 1, places=2) + + # Matching all parameters and their quant_state items: + d0 = dict(model_0.named_parameters()) + d1 = dict(model_1.named_parameters()) + self.assertTrue(d0.keys() == d1.keys()) + + for k in d0.keys(): + self.assertTrue(d0[k].shape == d1[k].shape) + self.assertTrue(d0[k].device.type == d1[k].device.type) + self.assertTrue(d0[k].device == d1[k].device) + self.assertTrue(d0[k].dtype == d1[k].dtype) + self.assertTrue(torch.equal(d0[k], d1[k].to(d0[k].device))) + + if isinstance(d0[k], bnb.nn.modules.Params4bit): + for v0, v1 in zip( + d0[k].quant_state.as_dict().values(), + d1[k].quant_state.as_dict().values(), + ): + if isinstance(v0, torch.Tensor): + self.assertTrue(torch.equal(v0, v1.to(v0.device))) + else: + self.assertTrue(v0 == v1) + + # comparing forward() outputs + dummy_inputs = self.get_dummy_inputs() + inputs = {k: v.to(torch_device) for k, v in dummy_inputs.items() if isinstance(v, torch.Tensor)} + inputs.update({k: v for k, v in dummy_inputs.items() if k not in inputs}) + out_0 = model_0(**inputs)[0] + out_1 = model_1(**inputs)[0] + self.assertTrue(torch.equal(out_0, out_1)) + + +class ExtendedSerializationTest(BaseBnb4BitSerializationTests): + """ + tests more combinations of parameters + """ + + def test_nf4_single_unsafe(self): + self.test_serialization(quant_type="nf4", double_quant=False, safe_serialization=False) + + def test_nf4_single_safe(self): + self.test_serialization(quant_type="nf4", double_quant=False, safe_serialization=True) + + def test_nf4_double_unsafe(self): + self.test_serialization(quant_type="nf4", double_quant=True, safe_serialization=False) + + # nf4 double safetensors quantization is tested in test_serialization() method from the parent class + + def test_fp4_single_unsafe(self): + self.test_serialization(quant_type="fp4", double_quant=False, safe_serialization=False) + + def test_fp4_single_safe(self): + self.test_serialization(quant_type="fp4", double_quant=False, safe_serialization=True) + + def test_fp4_double_unsafe(self): + self.test_serialization(quant_type="fp4", double_quant=True, safe_serialization=False) + + def test_fp4_double_safe(self): + self.test_serialization(quant_type="fp4", double_quant=True, safe_serialization=True) + + +@require_torch_version_greater("2.7.1") +@require_bitsandbytes_version_greater("0.45.5") +class Bnb4BitCompileTests(QuantCompileTests, unittest.TestCase): + @property + def quantization_config(self): + return PipelineQuantizationConfig( + quant_backend="bitsandbytes_4bit", + quant_kwargs={ + "load_in_4bit": True, + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_compute_dtype": torch.bfloat16, + }, + components_to_quantize=["transformer", "text_encoder_2"], + ) + + @require_bitsandbytes_version_greater("0.46.1") + def test_torch_compile(self): + torch._dynamo.config.capture_dynamic_output_shape_ops = True + super().test_torch_compile() + + def test_torch_compile_with_group_offload_leaf(self): + super()._test_torch_compile_with_group_offload_leaf(use_stream=True) diff --git a/pythonProject/diffusers-main/tests/quantization/bnb/test_mixed_int8.py b/pythonProject/diffusers-main/tests/quantization/bnb/test_mixed_int8.py new file mode 100644 index 0000000000000000000000000000000000000000..fde3966dec973c61aefb2da0680fbce8f9c607c8 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/bnb/test_mixed_int8.py @@ -0,0 +1,863 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Team Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a clone of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import gc +import tempfile +import unittest + +import numpy as np +import pytest +from huggingface_hub import hf_hub_download +from PIL import Image + +from diffusers import ( + BitsAndBytesConfig, + DiffusionPipeline, + FluxControlPipeline, + FluxTransformer2DModel, + SanaTransformer2DModel, + SD3Transformer2DModel, + logging, +) +from diffusers.quantizers import PipelineQuantizationConfig +from diffusers.utils import is_accelerate_version + +from ...testing_utils import ( + CaptureLogger, + backend_empty_cache, + is_bitsandbytes_available, + is_torch_available, + is_transformers_available, + load_pt, + numpy_cosine_similarity_distance, + require_accelerate, + require_bitsandbytes_version_greater, + require_peft_backend, + require_peft_version_greater, + require_torch, + require_torch_accelerator, + require_torch_version_greater_equal, + require_transformers_version_greater, + slow, + torch_device, +) +from ..test_torch_compile_utils import QuantCompileTests + + +def get_some_linear_layer(model): + if model.__class__.__name__ in ["SD3Transformer2DModel", "FluxTransformer2DModel"]: + return model.transformer_blocks[0].attn.to_q + else: + return NotImplementedError("Don't know what layer to retrieve here.") + + +if is_transformers_available(): + from transformers import BitsAndBytesConfig as BnbConfig + from transformers import T5EncoderModel + +if is_torch_available(): + import torch + + from ..utils import LoRALayer, get_memory_consumption_stat + + +if is_bitsandbytes_available(): + import bitsandbytes as bnb + + from diffusers.quantizers.bitsandbytes import replace_with_bnb_linear + + +@require_bitsandbytes_version_greater("0.43.2") +@require_accelerate +@require_torch +@require_torch_accelerator +@slow +class Base8bitTests(unittest.TestCase): + # We need to test on relatively large models (aka >1b parameters otherwise the quantiztion may not work as expected) + # Therefore here we use only SD3 to test our module + model_name = "stabilityai/stable-diffusion-3-medium-diffusers" + + # This was obtained on audace so the number might slightly change + expected_rel_difference = 1.94 + + expected_memory_saving_ratio = 0.7 + + prompt = "a beautiful sunset amidst the mountains." + num_inference_steps = 10 + seed = 0 + + @classmethod + def setUpClass(cls): + cls.is_deterministic_enabled = torch.are_deterministic_algorithms_enabled() + if not cls.is_deterministic_enabled: + torch.use_deterministic_algorithms(True) + + @classmethod + def tearDownClass(cls): + if not cls.is_deterministic_enabled: + torch.use_deterministic_algorithms(False) + + def get_dummy_inputs(self): + prompt_embeds = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt", + map_location="cpu", + ) + pooled_prompt_embeds = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt", + map_location="cpu", + ) + latent_model_input = load_pt( + "https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt", + map_location="cpu", + ) + + input_dict_for_transformer = { + "hidden_states": latent_model_input, + "encoder_hidden_states": prompt_embeds, + "pooled_projections": pooled_prompt_embeds, + "timestep": torch.Tensor([1.0]), + "return_dict": False, + } + return input_dict_for_transformer + + +class BnB8bitBasicTests(Base8bitTests): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + # Models + self.model_fp16 = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", torch_dtype=torch.float16 + ) + mixed_int8_config = BitsAndBytesConfig(load_in_8bit=True) + self.model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=mixed_int8_config, device_map=torch_device + ) + + def tearDown(self): + if hasattr(self, "model_fp16"): + del self.model_fp16 + if hasattr(self, "model_8bit"): + del self.model_8bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_quantization_num_parameters(self): + r""" + Test if the number of returned parameters is correct + """ + num_params_8bit = self.model_8bit.num_parameters() + num_params_fp16 = self.model_fp16.num_parameters() + + self.assertEqual(num_params_8bit, num_params_fp16) + + def test_quantization_config_json_serialization(self): + r""" + A simple test to check if the quantization config is correctly serialized and deserialized + """ + config = self.model_8bit.config + + self.assertTrue("quantization_config" in config) + + _ = config["quantization_config"].to_dict() + _ = config["quantization_config"].to_diff_dict() + + _ = config["quantization_config"].to_json_string() + + def test_memory_footprint(self): + r""" + A simple test to check if the model conversion has been done correctly by checking on the + memory footprint of the converted model and the class type of the linear layers of the converted models + """ + mem_fp16 = self.model_fp16.get_memory_footprint() + mem_8bit = self.model_8bit.get_memory_footprint() + + self.assertAlmostEqual(mem_fp16 / mem_8bit, self.expected_rel_difference, delta=1e-2) + linear = get_some_linear_layer(self.model_8bit) + self.assertTrue(linear.weight.__class__ == bnb.nn.Int8Params) + + def test_model_memory_usage(self): + # Delete to not let anything interfere. + del self.model_8bit, self.model_fp16 + + # Re-instantiate. + inputs = self.get_dummy_inputs() + inputs = { + k: v.to(device=torch_device, dtype=torch.float16) for k, v in inputs.items() if not isinstance(v, bool) + } + model_fp16 = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", torch_dtype=torch.float16 + ).to(torch_device) + unquantized_model_memory = get_memory_consumption_stat(model_fp16, inputs) + del model_fp16 + + config = BitsAndBytesConfig(load_in_8bit=True) + model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=config, torch_dtype=torch.float16 + ) + quantized_model_memory = get_memory_consumption_stat(model_8bit, inputs) + assert unquantized_model_memory / quantized_model_memory >= self.expected_memory_saving_ratio + + def test_original_dtype(self): + r""" + A simple test to check if the model successfully stores the original dtype + """ + self.assertTrue("_pre_quantization_dtype" in self.model_8bit.config) + self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config) + self.assertTrue(self.model_8bit.config["_pre_quantization_dtype"] == torch.float16) + + def test_keep_modules_in_fp32(self): + r""" + A simple tests to check if the modules under `_keep_in_fp32_modules` are kept in fp32. + Also ensures if inference works. + """ + fp32_modules = SD3Transformer2DModel._keep_in_fp32_modules + SD3Transformer2DModel._keep_in_fp32_modules = ["proj_out"] + + mixed_int8_config = BitsAndBytesConfig(load_in_8bit=True) + model = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=mixed_int8_config, device_map=torch_device + ) + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + if name in model._keep_in_fp32_modules: + self.assertTrue(module.weight.dtype == torch.float32) + else: + # 8-bit parameters are packed in int8 variables + self.assertTrue(module.weight.dtype == torch.int8) + + # test if inference works. + with torch.no_grad() and torch.autocast(model.device.type, dtype=torch.float16): + input_dict_for_transformer = self.get_dummy_inputs() + model_inputs = { + k: v.to(device=torch_device) for k, v in input_dict_for_transformer.items() if not isinstance(v, bool) + } + model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs}) + _ = model(**model_inputs) + + SD3Transformer2DModel._keep_in_fp32_modules = fp32_modules + + def test_linear_are_8bit(self): + r""" + A simple test to check if the model conversion has been done correctly by checking on the + memory footprint of the converted model and the class type of the linear layers of the converted models + """ + self.model_fp16.get_memory_footprint() + self.model_8bit.get_memory_footprint() + + for name, module in self.model_8bit.named_modules(): + if isinstance(module, torch.nn.Linear): + if name not in ["proj_out"]: + # 8-bit parameters are packed in int8 variables + self.assertTrue(module.weight.dtype == torch.int8) + + def test_llm_skip(self): + r""" + A simple test to check if `llm_int8_skip_modules` works as expected + """ + config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["proj_out"]) + model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=config, device_map=torch_device + ) + linear = get_some_linear_layer(model_8bit) + self.assertTrue(linear.weight.dtype == torch.int8) + self.assertTrue(isinstance(linear, bnb.nn.Linear8bitLt)) + + self.assertTrue(isinstance(model_8bit.proj_out, torch.nn.Linear)) + self.assertTrue(model_8bit.proj_out.weight.dtype != torch.int8) + + def test_config_from_pretrained(self): + transformer_8bit = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/flux.1-dev-int8-pkg", subfolder="transformer" + ) + linear = get_some_linear_layer(transformer_8bit) + self.assertTrue(linear.weight.__class__ == bnb.nn.Int8Params) + self.assertTrue(hasattr(linear.weight, "SCB")) + + def test_device_and_dtype_assignment(self): + r""" + Test whether trying to cast (or assigning a device to) a model after converting it in 8-bit will throw an error. + Checks also if other models are casted correctly. + """ + with self.assertRaises(ValueError): + # Tries with `str` + self.model_8bit.to("cpu") + + with self.assertRaises(ValueError): + # Tries with a `dtype`` + self.model_8bit.to(torch.float16) + + with self.assertRaises(ValueError): + # Tries with a `device` + self.model_8bit.to(torch.device(f"{torch_device}:0")) + + with self.assertRaises(ValueError): + # Tries with a `device` + self.model_8bit.float() + + with self.assertRaises(ValueError): + # Tries with a `device` + self.model_8bit.half() + + # Test if we did not break anything + self.model_fp16 = self.model_fp16.to(dtype=torch.float32, device=torch_device) + input_dict_for_transformer = self.get_dummy_inputs() + model_inputs = { + k: v.to(dtype=torch.float32, device=torch_device) + for k, v in input_dict_for_transformer.items() + if not isinstance(v, bool) + } + model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs}) + with torch.no_grad(): + _ = self.model_fp16(**model_inputs) + + # Check this does not throw an error + _ = self.model_fp16.to("cpu") + + # Check this does not throw an error + _ = self.model_fp16.half() + + # Check this does not throw an error + _ = self.model_fp16.float() + + # Check that this does not throw an error + _ = self.model_fp16.to(torch_device) + + def test_bnb_8bit_logs_warning_for_no_quantization(self): + model_with_no_linear = torch.nn.Sequential(torch.nn.Conv2d(4, 4, 3), torch.nn.ReLU()) + quantization_config = BitsAndBytesConfig(load_in_8bit=True) + logger = logging.get_logger("diffusers.quantizers.bitsandbytes.utils") + logger.setLevel(30) + with CaptureLogger(logger) as cap_logger: + _ = replace_with_bnb_linear(model_with_no_linear, quantization_config=quantization_config) + assert ( + "You are loading your model in 8bit or 4bit but no linear modules were found in your model." + in cap_logger.out + ) + + +class Bnb8bitDeviceTests(Base8bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + mixed_int8_config = BitsAndBytesConfig(load_in_8bit=True) + self.model_8bit = SanaTransformer2DModel.from_pretrained( + "Efficient-Large-Model/Sana_1600M_4Kpx_BF16_diffusers", + subfolder="transformer", + quantization_config=mixed_int8_config, + device_map=torch_device, + ) + + def tearDown(self): + del self.model_8bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_buffers_device_assignment(self): + for buffer_name, buffer in self.model_8bit.named_buffers(): + self.assertEqual( + buffer.device.type, + torch.device(torch_device).type, + f"Expected device {torch_device} for {buffer_name} got {buffer.device}.", + ) + + +class BnB8bitTrainingTests(Base8bitTests): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + mixed_int8_config = BitsAndBytesConfig(load_in_8bit=True) + self.model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=mixed_int8_config, device_map=torch_device + ) + + def test_training(self): + # Step 1: freeze all parameters + for param in self.model_8bit.parameters(): + param.requires_grad = False # freeze the model - train adapters later + if param.ndim == 1: + # cast the small parameters (e.g. layernorm) to fp32 for stability + param.data = param.data.to(torch.float32) + + # Step 2: add adapters + for _, module in self.model_8bit.named_modules(): + if "Attention" in repr(type(module)): + module.to_k = LoRALayer(module.to_k, rank=4) + module.to_q = LoRALayer(module.to_q, rank=4) + module.to_v = LoRALayer(module.to_v, rank=4) + + # Step 3: dummy batch + input_dict_for_transformer = self.get_dummy_inputs() + model_inputs = { + k: v.to(device=torch_device) for k, v in input_dict_for_transformer.items() if not isinstance(v, bool) + } + model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs}) + + # Step 4: Check if the gradient is not None + with torch.amp.autocast(torch_device, dtype=torch.float16): + out = self.model_8bit(**model_inputs)[0] + out.norm().backward() + + for module in self.model_8bit.modules(): + if isinstance(module, LoRALayer): + self.assertTrue(module.adapter[1].weight.grad is not None) + self.assertTrue(module.adapter[1].weight.grad.norm().item() > 0) + + +@require_transformers_version_greater("4.44.0") +class SlowBnb8bitTests(Base8bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + mixed_int8_config = BitsAndBytesConfig(load_in_8bit=True) + model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=mixed_int8_config, device_map=torch_device + ) + self.pipeline_8bit = DiffusionPipeline.from_pretrained( + self.model_name, transformer=model_8bit, torch_dtype=torch.float16 + ) + self.pipeline_8bit.enable_model_cpu_offload() + + def tearDown(self): + del self.pipeline_8bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_quality(self): + output = self.pipeline_8bit( + prompt=self.prompt, + num_inference_steps=self.num_inference_steps, + generator=torch.manual_seed(self.seed), + output_type="np", + ).images + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.0674, 0.0623, 0.0364, 0.0632, 0.0671, 0.0430, 0.0317, 0.0493, 0.0583]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-2) + + def test_model_cpu_offload_raises_warning(self): + model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, + subfolder="transformer", + quantization_config=BitsAndBytesConfig(load_in_8bit=True), + device_map=torch_device, + ) + pipeline_8bit = DiffusionPipeline.from_pretrained( + self.model_name, transformer=model_8bit, torch_dtype=torch.float16 + ) + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(30) + + with CaptureLogger(logger) as cap_logger: + pipeline_8bit.enable_model_cpu_offload() + + assert "has been loaded in `bitsandbytes` 8bit" in cap_logger.out + + def test_moving_to_cpu_throws_warning(self): + model_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, + subfolder="transformer", + quantization_config=BitsAndBytesConfig(load_in_8bit=True), + device_map=torch_device, + ) + logger = logging.get_logger("diffusers.pipelines.pipeline_utils") + logger.setLevel(30) + + with CaptureLogger(logger) as cap_logger: + # Because `model.dtype` will return torch.float16 as SD3 transformer has + # a conv layer as the first layer. + _ = DiffusionPipeline.from_pretrained( + self.model_name, transformer=model_8bit, torch_dtype=torch.float16 + ).to("cpu") + + assert "Pipelines loaded with `dtype=torch.float16`" in cap_logger.out + + def test_generate_quality_dequantize(self): + r""" + Test that loading the model and unquantize it produce correct results. + """ + self.pipeline_8bit.transformer.dequantize() + output = self.pipeline_8bit( + prompt=self.prompt, + num_inference_steps=self.num_inference_steps, + generator=torch.manual_seed(self.seed), + output_type="np", + ).images + + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.0266, 0.0264, 0.0271, 0.0110, 0.0310, 0.0098, 0.0078, 0.0256, 0.0208]) + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-2) + + # 8bit models cannot be offloaded to CPU. + self.assertTrue(self.pipeline_8bit.transformer.device.type == torch_device) + # calling it again shouldn't be a problem + _ = self.pipeline_8bit( + prompt=self.prompt, + num_inference_steps=2, + generator=torch.manual_seed(self.seed), + output_type="np", + ).images + + @pytest.mark.xfail( + condition=is_accelerate_version("<=", "1.1.1"), + reason="Test will pass after https://github.com/huggingface/accelerate/pull/3223 is in a release.", + strict=True, + ) + def test_pipeline_cuda_placement_works_with_mixed_int8(self): + transformer_8bit_config = BitsAndBytesConfig(load_in_8bit=True) + transformer_8bit = SD3Transformer2DModel.from_pretrained( + self.model_name, + subfolder="transformer", + quantization_config=transformer_8bit_config, + torch_dtype=torch.float16, + device_map=torch_device, + ) + text_encoder_3_8bit_config = BnbConfig(load_in_8bit=True) + text_encoder_3_8bit = T5EncoderModel.from_pretrained( + self.model_name, + subfolder="text_encoder_3", + quantization_config=text_encoder_3_8bit_config, + torch_dtype=torch.float16, + device_map=torch_device, + ) + + # CUDA device placement works. + device = torch_device if torch_device != "rocm" else "cuda" + pipeline_8bit = DiffusionPipeline.from_pretrained( + self.model_name, + transformer=transformer_8bit, + text_encoder_3=text_encoder_3_8bit, + torch_dtype=torch.float16, + ).to(device) + + # Check if inference works. + _ = pipeline_8bit(self.prompt, max_sequence_length=20, num_inference_steps=2) + + del pipeline_8bit + + def test_device_map(self): + """ + Test if the quantized model is working properly with "auto" + pu/disk offloading doesn't work with bnb. + """ + + def get_dummy_tensor_inputs(device=None, seed: int = 0): + batch_size = 1 + num_latent_channels = 4 + num_image_channels = 3 + height = width = 4 + sequence_length = 48 + embedding_dim = 32 + + torch.manual_seed(seed) + hidden_states = torch.randn((batch_size, height * width, num_latent_channels)).to( + device, dtype=torch.bfloat16 + ) + + torch.manual_seed(seed) + encoder_hidden_states = torch.randn((batch_size, sequence_length, embedding_dim)).to( + device, dtype=torch.bfloat16 + ) + + torch.manual_seed(seed) + pooled_prompt_embeds = torch.randn((batch_size, embedding_dim)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + text_ids = torch.randn((sequence_length, num_image_channels)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + image_ids = torch.randn((height * width, num_image_channels)).to(device, dtype=torch.bfloat16) + + timestep = torch.tensor([1.0]).to(device, dtype=torch.bfloat16).expand(batch_size) + + return { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_prompt_embeds, + "txt_ids": text_ids, + "img_ids": image_ids, + "timestep": timestep, + } + + inputs = get_dummy_tensor_inputs(torch_device) + expected_slice = np.array( + [ + 0.33789062, + -0.04736328, + -0.00256348, + -0.23144531, + -0.49804688, + 0.4375, + -0.15429688, + -0.65234375, + 0.44335938, + ] + ) + + # non sharded + quantization_config = BitsAndBytesConfig(load_in_8bit=True) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + device_map="auto", + torch_dtype=torch.bfloat16, + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, bnb.nn.modules.Int8Params)) + + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 1e-3) + + # sharded + quantization_config = BitsAndBytesConfig(load_in_8bit=True) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-sharded", + subfolder="transformer", + quantization_config=quantization_config, + device_map="auto", + torch_dtype=torch.bfloat16, + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, bnb.nn.modules.Int8Params)) + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 1e-3) + + +@require_transformers_version_greater("4.44.0") +class SlowBnb8bitFluxTests(Base8bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + model_id = "hf-internal-testing/flux.1-dev-int8-pkg" + t5_8bit = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder_2") + transformer_8bit = FluxTransformer2DModel.from_pretrained(model_id, subfolder="transformer") + self.pipeline_8bit = DiffusionPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + text_encoder_2=t5_8bit, + transformer=transformer_8bit, + torch_dtype=torch.float16, + ) + self.pipeline_8bit.enable_model_cpu_offload() + + def tearDown(self): + del self.pipeline_8bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_quality(self): + # keep the resolution and max tokens to a lower number for faster execution. + output = self.pipeline_8bit( + prompt=self.prompt, + num_inference_steps=self.num_inference_steps, + generator=torch.manual_seed(self.seed), + height=256, + width=256, + max_sequence_length=64, + output_type="np", + ).images + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.0574, 0.0554, 0.0581, 0.0686, 0.0676, 0.0759, 0.0757, 0.0803, 0.0930]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3) + + @require_peft_version_greater("0.14.0") + def test_lora_loading(self): + self.pipeline_8bit.load_lora_weights( + hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), adapter_name="hyper-sd" + ) + self.pipeline_8bit.set_adapters("hyper-sd", adapter_weights=0.125) + + output = self.pipeline_8bit( + prompt=self.prompt, + height=256, + width=256, + max_sequence_length=64, + output_type="np", + num_inference_steps=8, + generator=torch.manual_seed(42), + ).images + out_slice = output[0, -3:, -3:, -1].flatten() + + expected_slice = np.array([0.3916, 0.3916, 0.3887, 0.4243, 0.4155, 0.4233, 0.4570, 0.4531, 0.4248]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3) + + +@require_transformers_version_greater("4.44.0") +@require_peft_backend +class SlowBnb4BitFluxControlWithLoraTests(Base8bitTests): + def setUp(self) -> None: + gc.collect() + backend_empty_cache(torch_device) + + self.pipeline_8bit = FluxControlPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + quantization_config=PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + components_to_quantize=["transformer", "text_encoder_2"], + ), + torch_dtype=torch.float16, + ) + self.pipeline_8bit.enable_model_cpu_offload() + + def tearDown(self): + del self.pipeline_8bit + + gc.collect() + backend_empty_cache(torch_device) + + def test_lora_loading(self): + self.pipeline_8bit.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora") + + output = self.pipeline_8bit( + prompt=self.prompt, + control_image=Image.new(mode="RGB", size=(256, 256)), + height=256, + width=256, + max_sequence_length=64, + output_type="np", + num_inference_steps=8, + generator=torch.Generator().manual_seed(42), + ).images + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.2029, 0.2136, 0.2268, 0.1921, 0.1997, 0.2185, 0.2021, 0.2183, 0.2292]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3, msg=f"{out_slice=} != {expected_slice=}") + + +@slow +class BaseBnb8bitSerializationTests(Base8bitTests): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + quantization_config = BitsAndBytesConfig( + load_in_8bit=True, + ) + self.model_0 = SD3Transformer2DModel.from_pretrained( + self.model_name, subfolder="transformer", quantization_config=quantization_config, device_map=torch_device + ) + + def tearDown(self): + del self.model_0 + + gc.collect() + backend_empty_cache(torch_device) + + def test_serialization(self): + r""" + Test whether it is possible to serialize a model in 8-bit. Uses most typical params as default. + """ + self.assertTrue("_pre_quantization_dtype" in self.model_0.config) + with tempfile.TemporaryDirectory() as tmpdirname: + self.model_0.save_pretrained(tmpdirname) + + config = SD3Transformer2DModel.load_config(tmpdirname) + self.assertTrue("quantization_config" in config) + self.assertTrue("_pre_quantization_dtype" not in config) + + model_1 = SD3Transformer2DModel.from_pretrained(tmpdirname) + + # checking quantized linear module weight + linear = get_some_linear_layer(model_1) + self.assertTrue(linear.weight.__class__ == bnb.nn.Int8Params) + self.assertTrue(hasattr(linear.weight, "SCB")) + + # checking memory footpring + self.assertAlmostEqual(self.model_0.get_memory_footprint() / model_1.get_memory_footprint(), 1, places=2) + + # Matching all parameters and their quant_state items: + d0 = dict(self.model_0.named_parameters()) + d1 = dict(model_1.named_parameters()) + self.assertTrue(d0.keys() == d1.keys()) + + # comparing forward() outputs + dummy_inputs = self.get_dummy_inputs() + inputs = {k: v.to(torch_device) for k, v in dummy_inputs.items() if isinstance(v, torch.Tensor)} + inputs.update({k: v for k, v in dummy_inputs.items() if k not in inputs}) + out_0 = self.model_0(**inputs)[0] + out_1 = model_1(**inputs)[0] + self.assertTrue(torch.equal(out_0, out_1)) + + def test_serialization_sharded(self): + with tempfile.TemporaryDirectory() as tmpdirname: + self.model_0.save_pretrained(tmpdirname, max_shard_size="200MB") + + config = SD3Transformer2DModel.load_config(tmpdirname) + self.assertTrue("quantization_config" in config) + self.assertTrue("_pre_quantization_dtype" not in config) + + model_1 = SD3Transformer2DModel.from_pretrained(tmpdirname) + + # checking quantized linear module weight + linear = get_some_linear_layer(model_1) + self.assertTrue(linear.weight.__class__ == bnb.nn.Int8Params) + self.assertTrue(hasattr(linear.weight, "SCB")) + + # comparing forward() outputs + dummy_inputs = self.get_dummy_inputs() + inputs = {k: v.to(torch_device) for k, v in dummy_inputs.items() if isinstance(v, torch.Tensor)} + inputs.update({k: v for k, v in dummy_inputs.items() if k not in inputs}) + out_0 = self.model_0(**inputs)[0] + out_1 = model_1(**inputs)[0] + self.assertTrue(torch.equal(out_0, out_1)) + + +@require_torch_version_greater_equal("2.6.0") +@require_bitsandbytes_version_greater("0.45.5") +class Bnb8BitCompileTests(QuantCompileTests, unittest.TestCase): + @property + def quantization_config(self): + return PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + components_to_quantize=["transformer", "text_encoder_2"], + ) + + @pytest.mark.xfail( + reason="Test fails because of an offloading problem from Accelerate with confusion in hooks." + " Test passes without recompilation context manager. Refer to https://github.com/huggingface/diffusers/pull/12002/files#r2240462757 for details." + ) + def test_torch_compile(self): + torch._dynamo.config.capture_dynamic_output_shape_ops = True + super()._test_torch_compile(torch_dtype=torch.float16) + + def test_torch_compile_with_cpu_offload(self): + super()._test_torch_compile_with_cpu_offload(torch_dtype=torch.float16) + + @pytest.mark.xfail(reason="Test fails because of an offloading problem from Accelerate with confusion in hooks.") + def test_torch_compile_with_group_offload_leaf(self): + super()._test_torch_compile_with_group_offload_leaf(torch_dtype=torch.float16, use_stream=True) diff --git a/pythonProject/diffusers-main/tests/quantization/gguf/__init__.py b/pythonProject/diffusers-main/tests/quantization/gguf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/quantization/gguf/test_gguf.py b/pythonProject/diffusers-main/tests/quantization/gguf/test_gguf.py new file mode 100644 index 0000000000000000000000000000000000000000..38322459e761f5be2af0dba73b354a67a6a0c596 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/gguf/test_gguf.py @@ -0,0 +1,740 @@ +import gc +import unittest + +import numpy as np +import torch +import torch.nn as nn + +from diffusers import ( + AuraFlowPipeline, + AuraFlowTransformer2DModel, + DiffusionPipeline, + FluxControlPipeline, + FluxPipeline, + FluxTransformer2DModel, + GGUFQuantizationConfig, + HiDreamImageTransformer2DModel, + SD3Transformer2DModel, + StableDiffusion3Pipeline, + WanTransformer3DModel, + WanVACETransformer3DModel, +) +from diffusers.utils import load_image + +from ...testing_utils import ( + Expectations, + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + is_gguf_available, + nightly, + numpy_cosine_similarity_distance, + require_accelerate, + require_accelerator, + require_big_accelerator, + require_gguf_version_greater_or_equal, + require_kernels_version_greater_or_equal, + require_peft_backend, + require_torch_version_greater, + torch_device, +) +from ..test_torch_compile_utils import QuantCompileTests + + +if is_gguf_available(): + import gguf + + from diffusers.quantizers.gguf.utils import GGUFLinear, GGUFParameter + +enable_full_determinism() + + +@nightly +@require_accelerate +@require_accelerator +@require_gguf_version_greater_or_equal("0.10.0") +@require_kernels_version_greater_or_equal("0.9.0") +class GGUFCudaKernelsTests(unittest.TestCase): + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def test_cuda_kernels_vs_native(self): + if torch_device != "cuda": + self.skipTest("CUDA kernels test requires CUDA device") + + from diffusers.quantizers.gguf.utils import GGUFLinear, can_use_cuda_kernels + + if not can_use_cuda_kernels: + self.skipTest("CUDA kernels not available (compute capability < 7 or kernels not installed)") + + test_quant_types = ["Q4_0", "Q4_K"] + test_shape = (1, 64, 512) # batch, seq_len, hidden_dim + compute_dtype = torch.bfloat16 + + for quant_type in test_quant_types: + qtype = getattr(gguf.GGMLQuantizationType, quant_type) + in_features, out_features = 512, 512 + + torch.manual_seed(42) + float_weight = torch.randn(out_features, in_features, dtype=torch.float32) + quantized_data = gguf.quants.quantize(float_weight.numpy(), qtype) + weight_data = torch.from_numpy(quantized_data).to(device=torch_device) + weight = GGUFParameter(weight_data, quant_type=qtype) + + x = torch.randn(test_shape, dtype=compute_dtype, device=torch_device) + + linear = GGUFLinear(in_features, out_features, bias=True, compute_dtype=compute_dtype) + linear.weight = weight + linear.bias = nn.Parameter(torch.randn(out_features, dtype=compute_dtype)) + linear = linear.to(torch_device) + + with torch.no_grad(): + output_native = linear.forward_native(x) + output_cuda = linear.forward_cuda(x) + + assert torch.allclose(output_native, output_cuda, 1e-2), ( + f"GGUF CUDA Kernel Output is different from Native Output for {quant_type}" + ) + + +@nightly +@require_big_accelerator +@require_accelerate +@require_gguf_version_greater_or_equal("0.10.0") +class GGUFSingleFileTesterMixin: + ckpt_path = None + model_cls = None + torch_dtype = torch.bfloat16 + expected_memory_use_in_gb = 5 + + def test_gguf_parameters(self): + quant_storage_type = torch.uint8 + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config) + + for param_name, param in model.named_parameters(): + if isinstance(param, GGUFParameter): + assert hasattr(param, "quant_type") + assert param.dtype == quant_storage_type + + def test_gguf_linear_layers(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config) + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear) and hasattr(module.weight, "quant_type"): + assert module.weight.dtype == torch.uint8 + if module.bias is not None: + assert module.bias.dtype == self.torch_dtype + + def test_gguf_memory_usage(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + + model = self.model_cls.from_single_file( + self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype + ) + model.to(torch_device) + assert (model.get_memory_footprint() / 1024**3) < self.expected_memory_use_in_gb + inputs = self.get_dummy_inputs() + + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) + with torch.no_grad(): + model(**inputs) + max_memory = backend_max_memory_allocated(torch_device) + assert (max_memory / 1024**3) < self.expected_memory_use_in_gb + + def test_keep_modules_in_fp32(self): + r""" + A simple tests to check if the modules under `_keep_in_fp32_modules` are kept in fp32. + Also ensures if inference works. + """ + _keep_in_fp32_modules = self.model_cls._keep_in_fp32_modules + self.model_cls._keep_in_fp32_modules = ["proj_out"] + + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config) + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + if name in model._keep_in_fp32_modules: + assert module.weight.dtype == torch.float32 + self.model_cls._keep_in_fp32_modules = _keep_in_fp32_modules + + def test_dtype_assignment(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config) + + with self.assertRaises(ValueError): + # Tries with a `dtype` + model.to(torch.float16) + + with self.assertRaises(ValueError): + # Tries with a `device` and `dtype` + device_0 = f"{torch_device}:0" + model.to(device=device_0, dtype=torch.float16) + + with self.assertRaises(ValueError): + # Tries with a cast + model.float() + + with self.assertRaises(ValueError): + # Tries with a cast + model.half() + + # This should work + model.to(torch_device) + + def test_dequantize_model(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config) + model.dequantize() + + def _check_for_gguf_linear(model): + has_children = list(model.children()) + if not has_children: + return + + for name, module in model.named_children(): + if isinstance(module, nn.Linear): + assert not isinstance(module, GGUFLinear), f"{name} is still GGUFLinear" + assert not isinstance(module.weight, GGUFParameter), f"{name} weight is still GGUFParameter" + + for name, module in model.named_children(): + _check_for_gguf_linear(module) + + +class FluxGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf" + diffusers_ckpt_path = "https://huggingface.co/sayakpaul/flux-diffusers-gguf/blob/main/model-Q4_0.gguf" + torch_dtype = torch.bfloat16 + model_cls = FluxTransformer2DModel + expected_memory_use_in_gb = 5 + + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 4096, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "pooled_projections": torch.randn( + (1, 768), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + "img_ids": torch.randn((4096, 3), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "txt_ids": torch.randn((512, 3), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "guidance": torch.tensor([3.5]).to(torch_device, self.torch_dtype), + } + + def test_pipeline_inference(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + transformer = self.model_cls.from_single_file( + self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype + ) + pipe = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=self.torch_dtype + ) + pipe.enable_model_cpu_offload() + + prompt = "a cat holding a sign that says hello" + output = pipe( + prompt=prompt, num_inference_steps=2, generator=torch.Generator("cpu").manual_seed(0), output_type="np" + ).images[0] + output_slice = output[:3, :3, :].flatten() + expected_slice = np.array( + [ + 0.47265625, + 0.43359375, + 0.359375, + 0.47070312, + 0.421875, + 0.34375, + 0.46875, + 0.421875, + 0.34765625, + 0.46484375, + 0.421875, + 0.34179688, + 0.47070312, + 0.42578125, + 0.34570312, + 0.46875, + 0.42578125, + 0.3515625, + 0.45507812, + 0.4140625, + 0.33984375, + 0.4609375, + 0.41796875, + 0.34375, + 0.45898438, + 0.41796875, + 0.34375, + ] + ) + max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice) + assert max_diff < 1e-4 + + def test_loading_gguf_diffusers_format(self): + model = self.model_cls.from_single_file( + self.diffusers_ckpt_path, + subfolder="transformer", + quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), + config="black-forest-labs/FLUX.1-dev", + ) + model.to(torch_device) + model(**self.get_dummy_inputs()) + + +class SD35LargeGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/stable-diffusion-3.5-large-gguf/blob/main/sd3.5_large-Q4_0.gguf" + torch_dtype = torch.bfloat16 + model_cls = SD3Transformer2DModel + expected_memory_use_in_gb = 5 + + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 16, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "pooled_projections": torch.randn( + (1, 2048), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + def test_pipeline_inference(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + transformer = self.model_cls.from_single_file( + self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype + ) + pipe = StableDiffusion3Pipeline.from_pretrained( + "stabilityai/stable-diffusion-3.5-large", transformer=transformer, torch_dtype=self.torch_dtype + ) + pipe.enable_model_cpu_offload() + + prompt = "a cat holding a sign that says hello" + output = pipe( + prompt=prompt, + num_inference_steps=2, + generator=torch.Generator("cpu").manual_seed(0), + output_type="np", + ).images[0] + output_slice = output[:3, :3, :].flatten() + expected_slices = Expectations( + { + ("xpu", 3): np.array( + [ + 0.1953125, + 0.3125, + 0.31445312, + 0.13085938, + 0.30664062, + 0.29296875, + 0.11523438, + 0.2890625, + 0.28320312, + 0.16601562, + 0.3046875, + 0.328125, + 0.140625, + 0.31640625, + 0.32421875, + 0.12304688, + 0.3046875, + 0.3046875, + 0.17578125, + 0.3359375, + 0.3203125, + 0.16601562, + 0.34375, + 0.31640625, + 0.15429688, + 0.328125, + 0.31054688, + ] + ), + ("cuda", 7): np.array( + [ + 0.17578125, + 0.27539062, + 0.27734375, + 0.11914062, + 0.26953125, + 0.25390625, + 0.109375, + 0.25390625, + 0.25, + 0.15039062, + 0.26171875, + 0.28515625, + 0.13671875, + 0.27734375, + 0.28515625, + 0.12109375, + 0.26757812, + 0.265625, + 0.16210938, + 0.29882812, + 0.28515625, + 0.15625, + 0.30664062, + 0.27734375, + 0.14648438, + 0.29296875, + 0.26953125, + ] + ), + } + ) + expected_slice = expected_slices.get_expectation() + max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice) + assert max_diff < 1e-4 + + +class SD35MediumGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/stable-diffusion-3.5-medium-gguf/blob/main/sd3.5_medium-Q3_K_M.gguf" + torch_dtype = torch.bfloat16 + model_cls = SD3Transformer2DModel + expected_memory_use_in_gb = 2 + + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 16, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "pooled_projections": torch.randn( + (1, 2048), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + def test_pipeline_inference(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + transformer = self.model_cls.from_single_file( + self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype + ) + pipe = StableDiffusion3Pipeline.from_pretrained( + "stabilityai/stable-diffusion-3.5-medium", transformer=transformer, torch_dtype=self.torch_dtype + ) + pipe.enable_model_cpu_offload() + + prompt = "a cat holding a sign that says hello" + output = pipe( + prompt=prompt, num_inference_steps=2, generator=torch.Generator("cpu").manual_seed(0), output_type="np" + ).images[0] + output_slice = output[:3, :3, :].flatten() + expected_slice = np.array( + [ + 0.625, + 0.6171875, + 0.609375, + 0.65625, + 0.65234375, + 0.640625, + 0.6484375, + 0.640625, + 0.625, + 0.6484375, + 0.63671875, + 0.6484375, + 0.66796875, + 0.65625, + 0.65234375, + 0.6640625, + 0.6484375, + 0.6328125, + 0.6640625, + 0.6484375, + 0.640625, + 0.67578125, + 0.66015625, + 0.62109375, + 0.671875, + 0.65625, + 0.62109375, + ] + ) + max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice) + assert max_diff < 1e-4 + + +class AuraFlowGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/AuraFlow-v0.3-gguf/blob/main/aura_flow_0.3-Q2_K.gguf" + torch_dtype = torch.bfloat16 + model_cls = AuraFlowTransformer2DModel + expected_memory_use_in_gb = 4 + + def setUp(self): + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 4, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 2048), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + def test_pipeline_inference(self): + quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + transformer = self.model_cls.from_single_file( + self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype + ) + pipe = AuraFlowPipeline.from_pretrained( + "fal/AuraFlow-v0.3", transformer=transformer, torch_dtype=self.torch_dtype + ) + pipe.enable_model_cpu_offload() + + prompt = "a pony holding a sign that says hello" + output = pipe( + prompt=prompt, num_inference_steps=2, generator=torch.Generator("cpu").manual_seed(0), output_type="np" + ).images[0] + output_slice = output[:3, :3, :].flatten() + expected_slice = np.array( + [ + 0.46484375, + 0.546875, + 0.64453125, + 0.48242188, + 0.53515625, + 0.59765625, + 0.47070312, + 0.5078125, + 0.5703125, + 0.42773438, + 0.50390625, + 0.5703125, + 0.47070312, + 0.515625, + 0.57421875, + 0.45898438, + 0.48632812, + 0.53515625, + 0.4453125, + 0.5078125, + 0.56640625, + 0.47851562, + 0.5234375, + 0.57421875, + 0.48632812, + 0.5234375, + 0.56640625, + ] + ) + max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice) + assert max_diff < 1e-4 + + +@require_peft_backend +@nightly +@require_big_accelerator +@require_accelerate +@require_gguf_version_greater_or_equal("0.10.0") +class FluxControlLoRAGGUFTests(unittest.TestCase): + def test_lora_loading(self): + ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf" + transformer = FluxTransformer2DModel.from_single_file( + ckpt_path, + quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), + torch_dtype=torch.bfloat16, + ) + pipe = FluxControlPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + transformer=transformer, + torch_dtype=torch.bfloat16, + ).to(torch_device) + pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora") + + prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts." + control_image = load_image( + "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/control_image_robot_canny.png" + ) + + output = pipe( + prompt=prompt, + control_image=control_image, + height=256, + width=256, + num_inference_steps=10, + guidance_scale=30.0, + output_type="np", + generator=torch.manual_seed(0), + ).images + + out_slice = output[0, -3:, -3:, -1].flatten() + expected_slice = np.array([0.8047, 0.8359, 0.8711, 0.6875, 0.7070, 0.7383, 0.5469, 0.5820, 0.6641]) + + max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) + self.assertTrue(max_diff < 1e-3) + + +class HiDreamGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/HiDream-I1-Dev-gguf/blob/main/hidream-i1-dev-Q2_K.gguf" + torch_dtype = torch.bfloat16 + model_cls = HiDreamImageTransformer2DModel + expected_memory_use_in_gb = 8 + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 16, 128, 128), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states_t5": torch.randn( + (1, 128, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "encoder_hidden_states_llama3": torch.randn( + (32, 1, 128, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "pooled_embeds": torch.randn( + (1, 2048), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timesteps": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + +class WanGGUFTexttoVideoSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/Wan2.1-T2V-14B-gguf/blob/main/wan2.1-t2v-14b-Q3_K_S.gguf" + torch_dtype = torch.bfloat16 + model_cls = WanTransformer3DModel + expected_memory_use_in_gb = 9 + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 16, 2, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + +class WanGGUFImagetoVideoSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/Wan2.1-I2V-14B-480P-gguf/blob/main/wan2.1-i2v-14b-480p-Q3_K_S.gguf" + torch_dtype = torch.bfloat16 + model_cls = WanTransformer3DModel + expected_memory_use_in_gb = 9 + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 36, 2, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "encoder_hidden_states_image": torch.randn( + (1, 257, 1280), generator=torch.Generator("cpu").manual_seed(0) + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + +class WanVACEGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/QuantStack/Wan2.1_14B_VACE-GGUF/blob/main/Wan2.1_14B_VACE-Q3_K_S.gguf" + torch_dtype = torch.bfloat16 + model_cls = WanVACETransformer3DModel + expected_memory_use_in_gb = 9 + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 16, 2, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "control_hidden_states": torch.randn( + (1, 96, 2, 64, 64), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "control_hidden_states_scale": torch.randn( + (8,), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } + + +@require_torch_version_greater("2.7.1") +class GGUFCompileTests(QuantCompileTests, unittest.TestCase): + torch_dtype = torch.bfloat16 + gguf_ckpt = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf" + + @property + def quantization_config(self): + return GGUFQuantizationConfig(compute_dtype=self.torch_dtype) + + def _init_pipeline(self, *args, **kwargs): + transformer = FluxTransformer2DModel.from_single_file( + self.gguf_ckpt, quantization_config=self.quantization_config, torch_dtype=self.torch_dtype + ) + pipe = DiffusionPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=self.torch_dtype + ) + return pipe diff --git a/pythonProject/diffusers-main/tests/quantization/modelopt/__init__.py b/pythonProject/diffusers-main/tests/quantization/modelopt/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/quantization/modelopt/test_modelopt.py b/pythonProject/diffusers-main/tests/quantization/modelopt/test_modelopt.py new file mode 100644 index 0000000000000000000000000000000000000000..6b0624a280834d4035e216847925428e64b5d8a4 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/modelopt/test_modelopt.py @@ -0,0 +1,306 @@ +import gc +import tempfile +import unittest + +from diffusers import NVIDIAModelOptConfig, SD3Transformer2DModel, StableDiffusion3Pipeline +from diffusers.utils import is_nvidia_modelopt_available, is_torch_available +from diffusers.utils.testing_utils import ( + backend_empty_cache, + backend_reset_peak_memory_stats, + enable_full_determinism, + nightly, + numpy_cosine_similarity_distance, + require_accelerate, + require_big_accelerator, + require_modelopt_version_greater_or_equal, + require_torch_cuda_compatibility, + torch_device, +) + + +if is_nvidia_modelopt_available(): + import modelopt.torch.quantization as mtq + +if is_torch_available(): + import torch + + from ..utils import LoRALayer, get_memory_consumption_stat + +enable_full_determinism() + + +@nightly +@require_big_accelerator +@require_accelerate +@require_modelopt_version_greater_or_equal("0.33.1") +class ModelOptBaseTesterMixin: + model_id = "hf-internal-testing/tiny-sd3-pipe" + model_cls = SD3Transformer2DModel + pipeline_cls = StableDiffusion3Pipeline + torch_dtype = torch.bfloat16 + expected_memory_reduction = 0.0 + keep_in_fp32_module = "" + modules_to_not_convert = "" + _test_torch_compile = False + + def setUp(self): + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) + gc.collect() + + def tearDown(self): + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) + gc.collect() + + def get_dummy_init_kwargs(self): + return {"quant_type": "FP8"} + + def get_dummy_model_init_kwargs(self): + return { + "pretrained_model_name_or_path": self.model_id, + "torch_dtype": self.torch_dtype, + "quantization_config": NVIDIAModelOptConfig(**self.get_dummy_init_kwargs()), + "subfolder": "transformer", + } + + def test_modelopt_layers(self): + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + assert mtq.utils.is_quantized(module) + + def test_modelopt_memory_usage(self): + inputs = self.get_dummy_inputs() + inputs = { + k: v.to(device=torch_device, dtype=torch.bfloat16) for k, v in inputs.items() if not isinstance(v, bool) + } + + unquantized_model = self.model_cls.from_pretrained( + self.model_id, torch_dtype=self.torch_dtype, subfolder="transformer" + ) + unquantized_model.to(torch_device) + unquantized_model_memory = get_memory_consumption_stat(unquantized_model, inputs) + + quantized_model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + quantized_model.to(torch_device) + quantized_model_memory = get_memory_consumption_stat(quantized_model, inputs) + + assert unquantized_model_memory / quantized_model_memory >= self.expected_memory_reduction + + def test_keep_modules_in_fp32(self): + _keep_in_fp32_modules = self.model_cls._keep_in_fp32_modules + self.model_cls._keep_in_fp32_modules = self.keep_in_fp32_module + + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + model.to(torch_device) + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + if name in model._keep_in_fp32_modules: + assert module.weight.dtype == torch.float32 + self.model_cls._keep_in_fp32_modules = _keep_in_fp32_modules + + def test_modules_to_not_convert(self): + init_kwargs = self.get_dummy_model_init_kwargs() + quantization_config_kwargs = self.get_dummy_init_kwargs() + quantization_config_kwargs.update({"modules_to_not_convert": self.modules_to_not_convert}) + quantization_config = NVIDIAModelOptConfig(**quantization_config_kwargs) + init_kwargs.update({"quantization_config": quantization_config}) + + model = self.model_cls.from_pretrained(**init_kwargs) + model.to(torch_device) + + for name, module in model.named_modules(): + if name in self.modules_to_not_convert: + assert not mtq.utils.is_quantized(module) + + def test_dtype_assignment(self): + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + + with self.assertRaises(ValueError): + model.to(torch.float16) + + with self.assertRaises(ValueError): + device_0 = f"{torch_device}:0" + model.to(device=device_0, dtype=torch.float16) + + with self.assertRaises(ValueError): + model.float() + + with self.assertRaises(ValueError): + model.half() + + model.to(torch_device) + + def test_serialization(self): + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + inputs = self.get_dummy_inputs() + + model.to(torch_device) + with torch.no_grad(): + model_output = model(**inputs) + + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir) + saved_model = self.model_cls.from_pretrained( + tmp_dir, + torch_dtype=torch.bfloat16, + ) + + saved_model.to(torch_device) + with torch.no_grad(): + saved_model_output = saved_model(**inputs) + + assert torch.allclose(model_output.sample, saved_model_output.sample, rtol=1e-5, atol=1e-5) + + def test_torch_compile(self): + if not self._test_torch_compile: + return + + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + compiled_model = torch.compile(model, mode="max-autotune", fullgraph=True, dynamic=False) + + model.to(torch_device) + with torch.no_grad(): + model_output = model(**self.get_dummy_inputs()).sample + + compiled_model.to(torch_device) + with torch.no_grad(): + compiled_model_output = compiled_model(**self.get_dummy_inputs()).sample + + model_output = model_output.detach().float().cpu().numpy() + compiled_model_output = compiled_model_output.detach().float().cpu().numpy() + + max_diff = numpy_cosine_similarity_distance(model_output.flatten(), compiled_model_output.flatten()) + assert max_diff < 1e-3 + + def test_device_map_error(self): + with self.assertRaises(ValueError): + _ = self.model_cls.from_pretrained( + **self.get_dummy_model_init_kwargs(), + device_map={0: "8GB", "cpu": "16GB"}, + ) + + def get_dummy_inputs(self): + batch_size = 1 + seq_len = 16 + height = width = 32 + num_latent_channels = 4 + caption_channels = 8 + + torch.manual_seed(0) + hidden_states = torch.randn((batch_size, num_latent_channels, height, width)).to( + torch_device, dtype=torch.bfloat16 + ) + encoder_hidden_states = torch.randn((batch_size, seq_len, caption_channels)).to( + torch_device, dtype=torch.bfloat16 + ) + timestep = torch.tensor([1.0]).to(torch_device, dtype=torch.bfloat16).expand(batch_size) + + return { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "timestep": timestep, + } + + def test_model_cpu_offload(self): + init_kwargs = self.get_dummy_init_kwargs() + transformer = self.model_cls.from_pretrained( + self.model_id, + quantization_config=NVIDIAModelOptConfig(**init_kwargs), + subfolder="transformer", + torch_dtype=torch.bfloat16, + ) + pipe = self.pipeline_cls.from_pretrained(self.model_id, transformer=transformer, torch_dtype=torch.bfloat16) + pipe.enable_model_cpu_offload(device=torch_device) + _ = pipe("a cat holding a sign that says hello", num_inference_steps=2) + + def test_training(self): + quantization_config = NVIDIAModelOptConfig(**self.get_dummy_init_kwargs()) + quantized_model = self.model_cls.from_pretrained( + self.model_id, + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ).to(torch_device) + + for param in quantized_model.parameters(): + param.requires_grad = False + if param.ndim == 1: + param.data = param.data.to(torch.float32) + + for _, module in quantized_model.named_modules(): + if hasattr(module, "to_q"): + module.to_q = LoRALayer(module.to_q, rank=4) + if hasattr(module, "to_k"): + module.to_k = LoRALayer(module.to_k, rank=4) + if hasattr(module, "to_v"): + module.to_v = LoRALayer(module.to_v, rank=4) + + with torch.amp.autocast(str(torch_device), dtype=torch.bfloat16): + inputs = self.get_dummy_inputs() + output = quantized_model(**inputs)[0] + output.norm().backward() + + for module in quantized_model.modules(): + if isinstance(module, LoRALayer): + self.assertTrue(module.adapter[1].weight.grad is not None) + + +class SanaTransformerFP8WeightsTest(ModelOptBaseTesterMixin, unittest.TestCase): + expected_memory_reduction = 0.6 + + def get_dummy_init_kwargs(self): + return {"quant_type": "FP8"} + + +class SanaTransformerINT8WeightsTest(ModelOptBaseTesterMixin, unittest.TestCase): + expected_memory_reduction = 0.6 + _test_torch_compile = True + + def get_dummy_init_kwargs(self): + return {"quant_type": "INT8"} + + +@require_torch_cuda_compatibility(8.0) +class SanaTransformerINT4WeightsTest(ModelOptBaseTesterMixin, unittest.TestCase): + expected_memory_reduction = 0.55 + + def get_dummy_init_kwargs(self): + return { + "quant_type": "INT4", + "block_quantize": 128, + "channel_quantize": -1, + "disable_conv_quantization": True, + } + + +@require_torch_cuda_compatibility(8.0) +class SanaTransformerNF4WeightsTest(ModelOptBaseTesterMixin, unittest.TestCase): + expected_memory_reduction = 0.65 + + def get_dummy_init_kwargs(self): + return { + "quant_type": "NF4", + "block_quantize": 128, + "channel_quantize": -1, + "scale_block_quantize": 8, + "scale_channel_quantize": -1, + "modules_to_not_convert": ["conv"], + } + + +@require_torch_cuda_compatibility(8.0) +class SanaTransformerNVFP4WeightsTest(ModelOptBaseTesterMixin, unittest.TestCase): + expected_memory_reduction = 0.65 + + def get_dummy_init_kwargs(self): + return { + "quant_type": "NVFP4", + "block_quantize": 128, + "channel_quantize": -1, + "scale_block_quantize": 8, + "scale_channel_quantize": -1, + "modules_to_not_convert": ["conv"], + } diff --git a/pythonProject/diffusers-main/tests/quantization/quanto/__init__.py b/pythonProject/diffusers-main/tests/quantization/quanto/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/quantization/quanto/test_quanto.py b/pythonProject/diffusers-main/tests/quantization/quanto/test_quanto.py new file mode 100644 index 0000000000000000000000000000000000000000..e3463f136f942ef38b0a53bd27153535397a2f4b --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/quanto/test_quanto.py @@ -0,0 +1,335 @@ +import gc +import tempfile +import unittest + +from diffusers import FluxPipeline, FluxTransformer2DModel, QuantoConfig +from diffusers.models.attention_processor import Attention +from diffusers.utils import is_optimum_quanto_available, is_torch_available + +from ...testing_utils import ( + backend_empty_cache, + backend_reset_peak_memory_stats, + enable_full_determinism, + nightly, + numpy_cosine_similarity_distance, + require_accelerate, + require_accelerator, + require_torch_cuda_compatibility, + torch_device, +) + + +if is_optimum_quanto_available(): + from optimum.quanto import QLinear + +if is_torch_available(): + import torch + + from ..utils import LoRALayer, get_memory_consumption_stat + +enable_full_determinism() + + +@nightly +@require_accelerator +@require_accelerate +class QuantoBaseTesterMixin: + model_id = None + pipeline_model_id = None + model_cls = None + torch_dtype = torch.bfloat16 + # the expected reduction in peak memory used compared to an unquantized model expressed as a percentage + expected_memory_reduction = 0.0 + keep_in_fp32_module = "" + modules_to_not_convert = "" + _test_torch_compile = False + + def setUp(self): + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) + gc.collect() + + def tearDown(self): + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) + gc.collect() + + def get_dummy_init_kwargs(self): + return {"weights_dtype": "float8"} + + def get_dummy_model_init_kwargs(self): + return { + "pretrained_model_name_or_path": self.model_id, + "torch_dtype": self.torch_dtype, + "quantization_config": QuantoConfig(**self.get_dummy_init_kwargs()), + } + + def test_quanto_layers(self): + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + assert isinstance(module, QLinear) + + def test_quanto_memory_usage(self): + inputs = self.get_dummy_inputs() + inputs = { + k: v.to(device=torch_device, dtype=torch.bfloat16) for k, v in inputs.items() if not isinstance(v, bool) + } + + unquantized_model = self.model_cls.from_pretrained(self.model_id, torch_dtype=self.torch_dtype) + unquantized_model.to(torch_device) + unquantized_model_memory = get_memory_consumption_stat(unquantized_model, inputs) + + quantized_model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + quantized_model.to(torch_device) + quantized_model_memory = get_memory_consumption_stat(quantized_model, inputs) + + assert unquantized_model_memory / quantized_model_memory >= self.expected_memory_reduction + + def test_keep_modules_in_fp32(self): + r""" + A simple tests to check if the modules under `_keep_in_fp32_modules` are kept in fp32. + Also ensures if inference works. + """ + _keep_in_fp32_modules = self.model_cls._keep_in_fp32_modules + self.model_cls._keep_in_fp32_modules = self.keep_in_fp32_module + + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + model.to(torch_device) + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + if name in model._keep_in_fp32_modules: + assert module.weight.dtype == torch.float32 + self.model_cls._keep_in_fp32_modules = _keep_in_fp32_modules + + def test_modules_to_not_convert(self): + init_kwargs = self.get_dummy_model_init_kwargs() + + quantization_config_kwargs = self.get_dummy_init_kwargs() + quantization_config_kwargs.update({"modules_to_not_convert": self.modules_to_not_convert}) + quantization_config = QuantoConfig(**quantization_config_kwargs) + + init_kwargs.update({"quantization_config": quantization_config}) + + model = self.model_cls.from_pretrained(**init_kwargs) + model.to(torch_device) + + for name, module in model.named_modules(): + if name in self.modules_to_not_convert: + assert not isinstance(module, QLinear) + + def test_dtype_assignment(self): + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + + with self.assertRaises(ValueError): + # Tries with a `dtype` + model.to(torch.float16) + + with self.assertRaises(ValueError): + # Tries with a `device` and `dtype` + device_0 = f"{torch_device}:0" + model.to(device=device_0, dtype=torch.float16) + + with self.assertRaises(ValueError): + # Tries with a cast + model.float() + + with self.assertRaises(ValueError): + # Tries with a cast + model.half() + + # This should work + model.to(torch_device) + + def test_serialization(self): + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + inputs = self.get_dummy_inputs() + + model.to(torch_device) + with torch.no_grad(): + model_output = model(**inputs) + + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir) + saved_model = self.model_cls.from_pretrained( + tmp_dir, + torch_dtype=torch.bfloat16, + ) + + saved_model.to(torch_device) + with torch.no_grad(): + saved_model_output = saved_model(**inputs) + + assert torch.allclose(model_output.sample, saved_model_output.sample, rtol=1e-5, atol=1e-5) + + def test_torch_compile(self): + if not self._test_torch_compile: + return + + model = self.model_cls.from_pretrained(**self.get_dummy_model_init_kwargs()) + compiled_model = torch.compile(model, mode="max-autotune", fullgraph=True, dynamic=False) + + model.to(torch_device) + with torch.no_grad(): + model_output = model(**self.get_dummy_inputs()).sample + + compiled_model.to(torch_device) + with torch.no_grad(): + compiled_model_output = compiled_model(**self.get_dummy_inputs()).sample + + model_output = model_output.detach().float().cpu().numpy() + compiled_model_output = compiled_model_output.detach().float().cpu().numpy() + + max_diff = numpy_cosine_similarity_distance(model_output.flatten(), compiled_model_output.flatten()) + assert max_diff < 1e-3 + + def test_device_map_error(self): + with self.assertRaises(ValueError): + _ = self.model_cls.from_pretrained( + **self.get_dummy_model_init_kwargs(), device_map={0: "8GB", "cpu": "16GB"} + ) + + +class FluxTransformerQuantoMixin(QuantoBaseTesterMixin): + model_id = "hf-internal-testing/tiny-flux-transformer" + model_cls = FluxTransformer2DModel + pipeline_cls = FluxPipeline + torch_dtype = torch.bfloat16 + keep_in_fp32_module = "proj_out" + modules_to_not_convert = ["proj_out"] + _test_torch_compile = False + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 4096, 64), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states": torch.randn( + (1, 512, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "pooled_projections": torch.randn( + (1, 768), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + "img_ids": torch.randn((4096, 3), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "txt_ids": torch.randn((512, 3), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "guidance": torch.tensor([3.5]).to(torch_device, self.torch_dtype), + } + + def get_dummy_training_inputs(self, device=None, seed: int = 0): + batch_size = 1 + num_latent_channels = 4 + num_image_channels = 3 + height = width = 4 + sequence_length = 48 + embedding_dim = 32 + + torch.manual_seed(seed) + hidden_states = torch.randn((batch_size, height * width, num_latent_channels)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + encoder_hidden_states = torch.randn((batch_size, sequence_length, embedding_dim)).to( + device, dtype=torch.bfloat16 + ) + + torch.manual_seed(seed) + pooled_prompt_embeds = torch.randn((batch_size, embedding_dim)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + text_ids = torch.randn((sequence_length, num_image_channels)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + image_ids = torch.randn((height * width, num_image_channels)).to(device, dtype=torch.bfloat16) + + timestep = torch.tensor([1.0]).to(device, dtype=torch.bfloat16).expand(batch_size) + + return { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_prompt_embeds, + "txt_ids": text_ids, + "img_ids": image_ids, + "timestep": timestep, + } + + def test_model_cpu_offload(self): + init_kwargs = self.get_dummy_init_kwargs() + transformer = self.model_cls.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + quantization_config=QuantoConfig(**init_kwargs), + subfolder="transformer", + torch_dtype=torch.bfloat16, + ) + pipe = self.pipeline_cls.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", transformer=transformer, torch_dtype=torch.bfloat16 + ) + pipe.enable_model_cpu_offload(device=torch_device) + _ = pipe("a cat holding a sign that says hello", num_inference_steps=2) + + def test_training(self): + quantization_config = QuantoConfig(**self.get_dummy_init_kwargs()) + quantized_model = self.model_cls.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ).to(torch_device) + + for param in quantized_model.parameters(): + # freeze the model as only adapter layers will be trained + param.requires_grad = False + if param.ndim == 1: + param.data = param.data.to(torch.float32) + + for _, module in quantized_model.named_modules(): + if isinstance(module, Attention): + module.to_q = LoRALayer(module.to_q, rank=4) + module.to_k = LoRALayer(module.to_k, rank=4) + module.to_v = LoRALayer(module.to_v, rank=4) + + with torch.amp.autocast(str(torch_device), dtype=torch.bfloat16): + inputs = self.get_dummy_training_inputs(torch_device) + output = quantized_model(**inputs)[0] + output.norm().backward() + + for module in quantized_model.modules(): + if isinstance(module, LoRALayer): + self.assertTrue(module.adapter[1].weight.grad is not None) + + +class FluxTransformerFloat8WeightsTest(FluxTransformerQuantoMixin, unittest.TestCase): + expected_memory_reduction = 0.6 + + def get_dummy_init_kwargs(self): + return {"weights_dtype": "float8"} + + +class FluxTransformerInt8WeightsTest(FluxTransformerQuantoMixin, unittest.TestCase): + expected_memory_reduction = 0.6 + _test_torch_compile = True + + def get_dummy_init_kwargs(self): + return {"weights_dtype": "int8"} + + +@require_torch_cuda_compatibility(8.0) +class FluxTransformerInt4WeightsTest(FluxTransformerQuantoMixin, unittest.TestCase): + expected_memory_reduction = 0.55 + + def get_dummy_init_kwargs(self): + return {"weights_dtype": "int4"} + + +@require_torch_cuda_compatibility(8.0) +class FluxTransformerInt2WeightsTest(FluxTransformerQuantoMixin, unittest.TestCase): + expected_memory_reduction = 0.65 + + def get_dummy_init_kwargs(self): + return {"weights_dtype": "int2"} diff --git a/pythonProject/diffusers-main/tests/quantization/test_pipeline_level_quantization.py b/pythonProject/diffusers-main/tests/quantization/test_pipeline_level_quantization.py new file mode 100644 index 0000000000000000000000000000000000000000..51cf4057d64eda7a7c77ecbf8da24cb1981ce3f0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/test_pipeline_level_quantization.py @@ -0,0 +1,301 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Team Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a clone of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import tempfile +import unittest + +import torch +from parameterized import parameterized + +from diffusers import BitsAndBytesConfig, DiffusionPipeline, QuantoConfig +from diffusers.quantizers import PipelineQuantizationConfig +from diffusers.utils import logging + +from ..testing_utils import ( + CaptureLogger, + is_transformers_available, + require_accelerate, + require_bitsandbytes_version_greater, + require_quanto, + require_torch, + require_torch_accelerator, + slow, + torch_device, +) + + +if is_transformers_available(): + from transformers import BitsAndBytesConfig as TranBitsAndBytesConfig +else: + TranBitsAndBytesConfig = None + + +@require_bitsandbytes_version_greater("0.43.2") +@require_quanto +@require_accelerate +@require_torch +@require_torch_accelerator +@slow +class PipelineQuantizationTests(unittest.TestCase): + model_name = "hf-internal-testing/tiny-flux-pipe" + prompt = "a beautiful sunset amidst the mountains." + num_inference_steps = 10 + seed = 0 + + def test_quant_config_set_correctly_through_kwargs(self): + components_to_quantize = ["transformer", "text_encoder_2"] + quant_config = PipelineQuantizationConfig( + quant_backend="bitsandbytes_4bit", + quant_kwargs={ + "load_in_4bit": True, + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_compute_dtype": torch.bfloat16, + }, + components_to_quantize=components_to_quantize, + ) + pipe = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ).to(torch_device) + for name, component in pipe.components.items(): + if name in components_to_quantize: + self.assertTrue(getattr(component.config, "quantization_config", None) is not None) + quantization_config = component.config.quantization_config + self.assertTrue(quantization_config.load_in_4bit) + self.assertTrue(quantization_config.quant_method == "bitsandbytes") + + _ = pipe(self.prompt, num_inference_steps=self.num_inference_steps) + + def test_quant_config_set_correctly_through_granular(self): + quant_config = PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig(weights_dtype="int8"), + "text_encoder_2": TranBitsAndBytesConfig(load_in_4bit=True, compute_dtype=torch.bfloat16), + } + ) + components_to_quantize = list(quant_config.quant_mapping.keys()) + pipe = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ).to(torch_device) + for name, component in pipe.components.items(): + if name in components_to_quantize: + self.assertTrue(getattr(component.config, "quantization_config", None) is not None) + quantization_config = component.config.quantization_config + + if name == "text_encoder_2": + self.assertTrue(quantization_config.load_in_4bit) + self.assertTrue(quantization_config.quant_method == "bitsandbytes") + else: + self.assertTrue(quantization_config.quant_method == "quanto") + + _ = pipe(self.prompt, num_inference_steps=self.num_inference_steps) + + def test_raises_error_for_invalid_config(self): + with self.assertRaises(ValueError) as err_context: + _ = PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig(weights_dtype="int8"), + "text_encoder_2": TranBitsAndBytesConfig(load_in_4bit=True, compute_dtype=torch.bfloat16), + }, + quant_backend="bitsandbytes_4bit", + ) + + self.assertTrue( + str(err_context.exception) + == "Both `quant_backend` and `quant_mapping` cannot be specified at the same time." + ) + + def test_validation_for_kwargs(self): + components_to_quantize = ["transformer", "text_encoder_2"] + with self.assertRaises(ValueError) as err_context: + _ = PipelineQuantizationConfig( + quant_backend="quanto", + quant_kwargs={"weights_dtype": "int8"}, + components_to_quantize=components_to_quantize, + ) + + self.assertTrue( + "The signatures of the __init__ methods of the quantization config classes" in str(err_context.exception) + ) + + def test_raises_error_for_wrong_config_class(self): + quant_config = { + "transformer": QuantoConfig(weights_dtype="int8"), + "text_encoder_2": TranBitsAndBytesConfig(load_in_4bit=True, compute_dtype=torch.bfloat16), + } + with self.assertRaises(ValueError) as err_context: + _ = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ) + self.assertTrue( + str(err_context.exception) == "`quantization_config` must be an instance of `PipelineQuantizationConfig`." + ) + + def test_validation_for_mapping(self): + with self.assertRaises(ValueError) as err_context: + _ = PipelineQuantizationConfig( + quant_mapping={ + "transformer": DiffusionPipeline(), + "text_encoder_2": TranBitsAndBytesConfig(load_in_4bit=True, compute_dtype=torch.bfloat16), + } + ) + + self.assertTrue("Provided config for module_name=transformer could not be found" in str(err_context.exception)) + + def test_saving_loading(self): + quant_config = PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig(weights_dtype="int8"), + "text_encoder_2": TranBitsAndBytesConfig(load_in_4bit=True, compute_dtype=torch.bfloat16), + } + ) + components_to_quantize = list(quant_config.quant_mapping.keys()) + pipe = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ).to(torch_device) + + pipe_inputs = {"prompt": self.prompt, "num_inference_steps": self.num_inference_steps, "output_type": "latent"} + output_1 = pipe(**pipe_inputs, generator=torch.manual_seed(self.seed)).images + + with tempfile.TemporaryDirectory() as tmpdir: + pipe.save_pretrained(tmpdir) + loaded_pipe = DiffusionPipeline.from_pretrained(tmpdir, torch_dtype=torch.bfloat16).to(torch_device) + for name, component in loaded_pipe.components.items(): + if name in components_to_quantize: + self.assertTrue(getattr(component.config, "quantization_config", None) is not None) + quantization_config = component.config.quantization_config + + if name == "text_encoder_2": + self.assertTrue(quantization_config.load_in_4bit) + self.assertTrue(quantization_config.quant_method == "bitsandbytes") + else: + self.assertTrue(quantization_config.quant_method == "quanto") + + output_2 = loaded_pipe(**pipe_inputs, generator=torch.manual_seed(self.seed)).images + + self.assertTrue(torch.allclose(output_1, output_2)) + + @parameterized.expand(["quant_kwargs", "quant_mapping"]) + def test_warn_invalid_component(self, method): + invalid_component = "foo" + if method == "quant_kwargs": + components_to_quantize = ["transformer", invalid_component] + quant_config = PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + components_to_quantize=components_to_quantize, + ) + else: + quant_config = PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig("int8"), + invalid_component: TranBitsAndBytesConfig(load_in_8bit=True), + } + ) + + logger = logging.get_logger("diffusers.pipelines.pipeline_loading_utils") + logger.setLevel(logging.WARNING) + with CaptureLogger(logger) as cap_logger: + _ = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ) + self.assertTrue(invalid_component in cap_logger.out) + + @parameterized.expand(["quant_kwargs", "quant_mapping"]) + def test_no_quantization_for_all_invalid_components(self, method): + invalid_component = "foo" + if method == "quant_kwargs": + components_to_quantize = [invalid_component] + quant_config = PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + components_to_quantize=components_to_quantize, + ) + else: + quant_config = PipelineQuantizationConfig( + quant_mapping={invalid_component: TranBitsAndBytesConfig(load_in_8bit=True)} + ) + + pipe = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ) + for name, component in pipe.components.items(): + if isinstance(component, torch.nn.Module): + self.assertTrue(not hasattr(component.config, "quantization_config")) + + @parameterized.expand(["quant_kwargs", "quant_mapping"]) + def test_quant_config_repr(self, method): + component_name = "transformer" + if method == "quant_kwargs": + components_to_quantize = [component_name] + quant_config = PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + components_to_quantize=components_to_quantize, + ) + else: + quant_config = PipelineQuantizationConfig( + quant_mapping={component_name: BitsAndBytesConfig(load_in_8bit=True)} + ) + + pipe = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ) + self.assertTrue(getattr(pipe, "quantization_config", None) is not None) + retrieved_config = pipe.quantization_config + expected_config = """ +transformer BitsAndBytesConfig { + "_load_in_4bit": false, + "_load_in_8bit": true, + "bnb_4bit_compute_dtype": "float32", + "bnb_4bit_quant_storage": "uint8", + "bnb_4bit_quant_type": "fp4", + "bnb_4bit_use_double_quant": false, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": false, + "load_in_8bit": true, + "quant_method": "bitsandbytes" +} + +""" + expected_data = self._parse_config_string(expected_config) + actual_data = self._parse_config_string(str(retrieved_config)) + self.assertTrue(actual_data == expected_data) + + def _parse_config_string(self, config_string: str) -> tuple[str, dict]: + first_brace = config_string.find("{") + if first_brace == -1: + raise ValueError("Could not find opening brace '{' in the string.") + + json_part = config_string[first_brace:] + data = json.loads(json_part) + + return data diff --git a/pythonProject/diffusers-main/tests/quantization/test_torch_compile_utils.py b/pythonProject/diffusers-main/tests/quantization/test_torch_compile_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..29758cbdd7352fec06290df2703eff78ee9bf92a --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/test_torch_compile_utils.py @@ -0,0 +1,106 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Team Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a clone of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import gc +import inspect + +import torch + +from diffusers import DiffusionPipeline + +from ..testing_utils import backend_empty_cache, require_torch_accelerator, slow, torch_device + + +@require_torch_accelerator +@slow +class QuantCompileTests: + @property + def quantization_config(self): + raise NotImplementedError( + "This property should be implemented in the subclass to return the appropriate quantization config." + ) + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + torch.compiler.reset() + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + torch.compiler.reset() + + def _init_pipeline(self, quantization_config, torch_dtype): + pipe = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-3-medium-diffusers", + quantization_config=quantization_config, + torch_dtype=torch_dtype, + ) + return pipe + + def _test_torch_compile(self, torch_dtype=torch.bfloat16): + pipe = self._init_pipeline(self.quantization_config, torch_dtype).to(torch_device) + # `fullgraph=True` ensures no graph breaks + pipe.transformer.compile(fullgraph=True) + + # small resolutions to ensure speedy execution. + with torch._dynamo.config.patch(error_on_recompile=True): + pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256) + + def _test_torch_compile_with_cpu_offload(self, torch_dtype=torch.bfloat16): + pipe = self._init_pipeline(self.quantization_config, torch_dtype) + pipe.enable_model_cpu_offload() + # regional compilation is better for offloading. + # see: https://pytorch.org/blog/torch-compile-and-diffusers-a-hands-on-guide-to-peak-performance/ + if getattr(pipe.transformer, "_repeated_blocks"): + pipe.transformer.compile_repeated_blocks(fullgraph=True) + else: + pipe.transformer.compile() + + # small resolutions to ensure speedy execution. + pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256) + + def _test_torch_compile_with_group_offload_leaf(self, torch_dtype=torch.bfloat16, *, use_stream: bool = False): + torch._dynamo.config.cache_size_limit = 1000 + + pipe = self._init_pipeline(self.quantization_config, torch_dtype) + group_offload_kwargs = { + "onload_device": torch.device(torch_device), + "offload_device": torch.device("cpu"), + "offload_type": "leaf_level", + "use_stream": use_stream, + } + pipe.transformer.enable_group_offload(**group_offload_kwargs) + pipe.transformer.compile() + for name, component in pipe.components.items(): + if name != "transformer" and isinstance(component, torch.nn.Module): + if torch.device(component.device).type == "cpu": + component.to(torch_device) + + # small resolutions to ensure speedy execution. + pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256) + + def test_torch_compile(self): + self._test_torch_compile() + + def test_torch_compile_with_cpu_offload(self): + self._test_torch_compile_with_cpu_offload() + + def test_torch_compile_with_group_offload_leaf(self, use_stream=False): + for cls in inspect.getmro(self.__class__): + if "test_torch_compile_with_group_offload_leaf" in cls.__dict__ and cls is not QuantCompileTests: + return + self._test_torch_compile_with_group_offload_leaf(use_stream=use_stream) diff --git a/pythonProject/diffusers-main/tests/quantization/torchao/README.md b/pythonProject/diffusers-main/tests/quantization/torchao/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fadc529e12fc0876ca079f58aa90518823b88474 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/torchao/README.md @@ -0,0 +1,53 @@ +The tests here are adapted from [`transformers` tests](https://github.com/huggingface/transformers/blob/3a8eb74668e9c2cc563b2f5c62fac174797063e0/tests/quantization/torchao_integration/). + +The benchmarks were run on a single H100. Below is `nvidia-smi`: + +```bash ++---------------------------------------------------------------------------------------+ +| NVIDIA-SMI 535.104.12 Driver Version: 535.104.12 CUDA Version: 12.2 | +|-----------------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+======================+======================| +| 0 NVIDIA H100 80GB HBM3 On | 00000000:53:00.0 Off | 0 | +| N/A 34C P0 69W / 700W | 2MiB / 81559MiB | 0% Default | +| | | Disabled | ++-----------------------------------------+----------------------+----------------------+ + ++---------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=======================================================================================| +| No running processes found | ++---------------------------------------------------------------------------------------+ +``` + +The benchmark results for Flux and CogVideoX can be found in [this](https://github.com/huggingface/diffusers/pull/10009) PR. + +The tests, and the expected slices, were obtained from the `aws-g6e-xlarge-plus` GPU test runners. To run the slow tests, use the following command or an equivalent: + +```bash +HF_HUB_ENABLE_HF_TRANSFER=1 RUN_SLOW=1 pytest -s tests/quantization/torchao/test_torchao.py::SlowTorchAoTests +``` + +`diffusers-cli`: + +```bash +- 🤗 Diffusers version: 0.32.0.dev0 +- Platform: Linux-5.15.0-1049-aws-x86_64-with-glibc2.31 +- Running on Google Colab?: No +- Python version: 3.10.14 +- PyTorch version (GPU?): 2.6.0.dev20241112+cu121 (False) +- Flax version (CPU?/GPU?/TPU?): not installed (NA) +- Jax version: not installed +- JaxLib version: not installed +- Huggingface_hub version: 0.26.2 +- Transformers version: 4.46.3 +- Accelerate version: 1.1.1 +- PEFT version: not installed +- Bitsandbytes version: not installed +- Safetensors version: 0.4.5 +- xFormers version: not installed +``` diff --git a/pythonProject/diffusers-main/tests/quantization/torchao/__init__.py b/pythonProject/diffusers-main/tests/quantization/torchao/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/quantization/torchao/test_torchao.py b/pythonProject/diffusers-main/tests/quantization/torchao/test_torchao.py new file mode 100644 index 0000000000000000000000000000000000000000..920c3a55f56cd64b31301462a192b2580b96dcad --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/torchao/test_torchao.py @@ -0,0 +1,890 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import tempfile +import unittest +from typing import List + +import numpy as np +from parameterized import parameterized +from transformers import AutoTokenizer, CLIPTextModel, CLIPTokenizer, T5EncoderModel + +from diffusers import ( + AutoencoderKL, + FlowMatchEulerDiscreteScheduler, + FluxPipeline, + FluxTransformer2DModel, + TorchAoConfig, +) +from diffusers.models.attention_processor import Attention +from diffusers.quantizers import PipelineQuantizationConfig + +from ...testing_utils import ( + backend_empty_cache, + backend_synchronize, + enable_full_determinism, + is_torch_available, + is_torchao_available, + nightly, + numpy_cosine_similarity_distance, + require_torch, + require_torch_accelerator, + require_torchao_version_greater_or_equal, + slow, + torch_device, +) +from ..test_torch_compile_utils import QuantCompileTests + + +enable_full_determinism() + + +if is_torch_available(): + import torch + import torch.nn as nn + + from ..utils import LoRALayer, get_memory_consumption_stat + + +if is_torchao_available(): + from torchao.dtypes import AffineQuantizedTensor + from torchao.quantization.linear_activation_quantized_tensor import LinearActivationQuantizedTensor + from torchao.quantization.quant_primitives import MappingType + from torchao.utils import get_model_size_in_bytes + + +@require_torch +@require_torch_accelerator +@require_torchao_version_greater_or_equal("0.7.0") +class TorchAoConfigTest(unittest.TestCase): + def test_to_dict(self): + """ + Makes sure the config format is properly set + """ + quantization_config = TorchAoConfig("int4_weight_only") + torchao_orig_config = quantization_config.to_dict() + + for key in torchao_orig_config: + self.assertEqual(getattr(quantization_config, key), torchao_orig_config[key]) + + def test_post_init_check(self): + """ + Test kwargs validations in TorchAoConfig + """ + _ = TorchAoConfig("int4_weight_only") + with self.assertRaisesRegex(ValueError, "is not supported"): + _ = TorchAoConfig("uint8") + + with self.assertRaisesRegex(ValueError, "does not support the following keyword arguments"): + _ = TorchAoConfig("int4_weight_only", group_size1=32) + + def test_repr(self): + """ + Check that there is no error in the repr + """ + quantization_config = TorchAoConfig("int4_weight_only", modules_to_not_convert=["conv"], group_size=8) + expected_repr = """TorchAoConfig { + "modules_to_not_convert": [ + "conv" + ], + "quant_method": "torchao", + "quant_type": "int4_weight_only", + "quant_type_kwargs": { + "group_size": 8 + } + }""".replace(" ", "").replace("\n", "") + quantization_repr = repr(quantization_config).replace(" ", "").replace("\n", "") + self.assertEqual(quantization_repr, expected_repr) + + quantization_config = TorchAoConfig("int4dq", group_size=64, act_mapping_type=MappingType.SYMMETRIC) + expected_repr = """TorchAoConfig { + "modules_to_not_convert": null, + "quant_method": "torchao", + "quant_type": "int4dq", + "quant_type_kwargs": { + "act_mapping_type": "SYMMETRIC", + "group_size": 64 + } + }""".replace(" ", "").replace("\n", "") + quantization_repr = repr(quantization_config).replace(" ", "").replace("\n", "") + self.assertEqual(quantization_repr, expected_repr) + + +# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners +@require_torch +@require_torch_accelerator +@require_torchao_version_greater_or_equal("0.7.0") +class TorchAoTest(unittest.TestCase): + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_components( + self, quantization_config: TorchAoConfig, model_id: str = "hf-internal-testing/tiny-flux-pipe" + ): + transformer = FluxTransformer2DModel.from_pretrained( + model_id, + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ) + text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.bfloat16) + text_encoder_2 = T5EncoderModel.from_pretrained( + model_id, subfolder="text_encoder_2", torch_dtype=torch.bfloat16 + ) + tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer") + tokenizer_2 = AutoTokenizer.from_pretrained(model_id, subfolder="tokenizer_2") + vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.bfloat16) + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "transformer": transformer, + "vae": vae, + } + + def get_dummy_inputs(self, device: torch.device, seed: int = 0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator().manual_seed(seed) + + inputs = { + "prompt": "an astronaut riding a horse in space", + "height": 32, + "width": 32, + "num_inference_steps": 2, + "output_type": "np", + "generator": generator, + } + + return inputs + + def get_dummy_tensor_inputs(self, device=None, seed: int = 0): + batch_size = 1 + num_latent_channels = 4 + num_image_channels = 3 + height = width = 4 + sequence_length = 48 + embedding_dim = 32 + + torch.manual_seed(seed) + hidden_states = torch.randn((batch_size, height * width, num_latent_channels)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + encoder_hidden_states = torch.randn((batch_size, sequence_length, embedding_dim)).to( + device, dtype=torch.bfloat16 + ) + + torch.manual_seed(seed) + pooled_prompt_embeds = torch.randn((batch_size, embedding_dim)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + text_ids = torch.randn((sequence_length, num_image_channels)).to(device, dtype=torch.bfloat16) + + torch.manual_seed(seed) + image_ids = torch.randn((height * width, num_image_channels)).to(device, dtype=torch.bfloat16) + + timestep = torch.tensor([1.0]).to(device, dtype=torch.bfloat16).expand(batch_size) + + return { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_prompt_embeds, + "txt_ids": text_ids, + "img_ids": image_ids, + "timestep": timestep, + } + + def _test_quant_type(self, quantization_config: TorchAoConfig, expected_slice: List[float], model_id: str): + components = self.get_dummy_components(quantization_config, model_id) + pipe = FluxPipeline(**components) + pipe.to(device=torch_device) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0] + output_slice = output[-1, -1, -3:, -3:].flatten() + + self.assertTrue(np.allclose(output_slice, expected_slice, atol=1e-3, rtol=1e-3)) + + def test_quantization(self): + for model_id in ["hf-internal-testing/tiny-flux-pipe", "hf-internal-testing/tiny-flux-sharded"]: + # fmt: off + QUANTIZATION_TYPES_TO_TEST = [ + ("int4wo", np.array([0.4648, 0.5234, 0.5547, 0.4219, 0.4414, 0.6445, 0.4336, 0.4531, 0.5625])), + ("int4dq", np.array([0.4688, 0.5195, 0.5547, 0.418, 0.4414, 0.6406, 0.4336, 0.4531, 0.5625])), + ("int8wo", np.array([0.4648, 0.5195, 0.5547, 0.4199, 0.4414, 0.6445, 0.4316, 0.4531, 0.5625])), + ("int8dq", np.array([0.4648, 0.5195, 0.5547, 0.4199, 0.4414, 0.6445, 0.4316, 0.4531, 0.5625])), + ("uint4wo", np.array([0.4609, 0.5234, 0.5508, 0.4199, 0.4336, 0.6406, 0.4316, 0.4531, 0.5625])), + ("uint7wo", np.array([0.4648, 0.5195, 0.5547, 0.4219, 0.4414, 0.6445, 0.4316, 0.4531, 0.5625])), + ] + + if TorchAoConfig._is_xpu_or_cuda_capability_atleast_8_9(): + QUANTIZATION_TYPES_TO_TEST.extend([ + ("float8wo_e5m2", np.array([0.4590, 0.5273, 0.5547, 0.4219, 0.4375, 0.6406, 0.4316, 0.4512, 0.5625])), + ("float8wo_e4m3", np.array([0.4648, 0.5234, 0.5547, 0.4219, 0.4414, 0.6406, 0.4316, 0.4531, 0.5625])), + # ===== + # The following lead to an internal torch error: + # RuntimeError: mat2 shape (32x4 must be divisible by 16 + # Skip these for now; TODO(aryan): investigate later + # ("float8dq_e4m3", np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])), + # ("float8dq_e4m3_tensor", np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])), + # ===== + # Cutlass fails to initialize for below + # ("float8dq_e4m3_row", np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])), + # ===== + ("fp4", np.array([0.4668, 0.5195, 0.5547, 0.4199, 0.4434, 0.6445, 0.4316, 0.4531, 0.5625])), + ("fp6", np.array([0.4668, 0.5195, 0.5547, 0.4199, 0.4434, 0.6445, 0.4316, 0.4531, 0.5625])), + ]) + # fmt: on + + for quantization_name, expected_slice in QUANTIZATION_TYPES_TO_TEST: + quant_kwargs = {} + if quantization_name in ["uint4wo", "uint7wo"]: + # The dummy flux model that we use has smaller dimensions. This imposes some restrictions on group_size here + quant_kwargs.update({"group_size": 16}) + quantization_config = TorchAoConfig( + quant_type=quantization_name, modules_to_not_convert=["x_embedder"], **quant_kwargs + ) + self._test_quant_type(quantization_config, expected_slice, model_id) + + def test_int4wo_quant_bfloat16_conversion(self): + """ + Tests whether the dtype of model will be modified to bfloat16 for int4 weight-only quantization. + """ + quantization_config = TorchAoConfig("int4_weight_only", group_size=64) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + device_map=f"{torch_device}:0", + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, AffineQuantizedTensor)) + self.assertEqual(weight.quant_min, 0) + self.assertEqual(weight.quant_max, 15) + + def test_device_map(self): + """ + Test if the quantized model int4 weight-only is working properly with "auto" and custom device maps. + The custom device map performs cpu/disk offloading as well. Also verifies that the device map is + correctly set (in the `hf_device_map` attribute of the model). + """ + custom_device_map_dict = { + "time_text_embed": torch_device, + "context_embedder": torch_device, + "x_embedder": torch_device, + "transformer_blocks.0": "cpu", + "single_transformer_blocks.0": "disk", + "norm_out": torch_device, + "proj_out": "cpu", + } + device_maps = ["auto", custom_device_map_dict] + + inputs = self.get_dummy_tensor_inputs(torch_device) + # requires with different expected slices since models are different due to offload (we don't quantize modules offloaded to cpu/disk) + expected_slice_auto = np.array( + [ + 0.34179688, + -0.03613281, + 0.01428223, + -0.22949219, + -0.49609375, + 0.4375, + -0.1640625, + -0.66015625, + 0.43164062, + ] + ) + expected_slice_offload = np.array( + [0.34375, -0.03515625, 0.0123291, -0.22753906, -0.49414062, 0.4375, -0.16308594, -0.66015625, 0.43554688] + ) + for device_map in device_maps: + if device_map == "auto": + expected_slice = expected_slice_auto + else: + expected_slice = expected_slice_offload + with tempfile.TemporaryDirectory() as offload_folder: + quantization_config = TorchAoConfig("int4_weight_only", group_size=64) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + device_map=device_map, + torch_dtype=torch.bfloat16, + offload_folder=offload_folder, + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + + # Note that when performing cpu/disk offload, the offloaded weights are not quantized, only the weights on the gpu. + # This is not the case when the model are already quantized + if "transformer_blocks.0" in device_map: + self.assertTrue(isinstance(weight, nn.Parameter)) + else: + self.assertTrue(isinstance(weight, AffineQuantizedTensor)) + + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 2e-3) + + with tempfile.TemporaryDirectory() as offload_folder: + quantization_config = TorchAoConfig("int4_weight_only", group_size=64) + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-sharded", + subfolder="transformer", + quantization_config=quantization_config, + device_map=device_map, + torch_dtype=torch.bfloat16, + offload_folder=offload_folder, + ) + + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + if "transformer_blocks.0" in device_map: + self.assertTrue(isinstance(weight, nn.Parameter)) + else: + self.assertTrue(isinstance(weight, AffineQuantizedTensor)) + + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 2e-3) + + def test_modules_to_not_convert(self): + quantization_config = TorchAoConfig("int8_weight_only", modules_to_not_convert=["transformer_blocks.0"]) + quantized_model_with_not_convert = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ) + + unquantized_layer = quantized_model_with_not_convert.transformer_blocks[0].ff.net[2] + self.assertTrue(isinstance(unquantized_layer, torch.nn.Linear)) + self.assertFalse(isinstance(unquantized_layer.weight, AffineQuantizedTensor)) + self.assertEqual(unquantized_layer.weight.dtype, torch.bfloat16) + + quantized_layer = quantized_model_with_not_convert.proj_out + self.assertTrue(isinstance(quantized_layer.weight, AffineQuantizedTensor)) + + quantization_config = TorchAoConfig("int8_weight_only") + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ) + + size_quantized_with_not_convert = get_model_size_in_bytes(quantized_model_with_not_convert) + size_quantized = get_model_size_in_bytes(quantized_model) + + self.assertTrue(size_quantized < size_quantized_with_not_convert) + + def test_training(self): + quantization_config = TorchAoConfig("int8_weight_only") + quantized_model = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/tiny-flux-pipe", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ).to(torch_device) + + for param in quantized_model.parameters(): + # freeze the model as only adapter layers will be trained + param.requires_grad = False + if param.ndim == 1: + param.data = param.data.to(torch.float32) + + for _, module in quantized_model.named_modules(): + if isinstance(module, Attention): + module.to_q = LoRALayer(module.to_q, rank=4) + module.to_k = LoRALayer(module.to_k, rank=4) + module.to_v = LoRALayer(module.to_v, rank=4) + + with torch.amp.autocast(str(torch_device), dtype=torch.bfloat16): + inputs = self.get_dummy_tensor_inputs(torch_device) + output = quantized_model(**inputs)[0] + output.norm().backward() + + for module in quantized_model.modules(): + if isinstance(module, LoRALayer): + self.assertTrue(module.adapter[1].weight.grad is not None) + self.assertTrue(module.adapter[1].weight.grad.norm().item() > 0) + + @nightly + def test_torch_compile(self): + r"""Test that verifies if torch.compile works with torchao quantization.""" + for model_id in ["hf-internal-testing/tiny-flux-pipe", "hf-internal-testing/tiny-flux-sharded"]: + quantization_config = TorchAoConfig("int8_weight_only") + components = self.get_dummy_components(quantization_config, model_id=model_id) + pipe = FluxPipeline(**components) + pipe.to(device=torch_device) + + inputs = self.get_dummy_inputs(torch_device) + normal_output = pipe(**inputs)[0].flatten()[-32:] + + pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True, dynamic=False) + inputs = self.get_dummy_inputs(torch_device) + compile_output = pipe(**inputs)[0].flatten()[-32:] + + # Note: Seems to require higher tolerance + self.assertTrue(np.allclose(normal_output, compile_output, atol=1e-2, rtol=1e-3)) + + def test_memory_footprint(self): + r""" + A simple test to check if the model conversion has been done correctly by checking on the + memory footprint of the converted model and the class type of the linear layers of the converted models + """ + for model_id in ["hf-internal-testing/tiny-flux-pipe", "hf-internal-testing/tiny-flux-sharded"]: + transformer_int4wo = self.get_dummy_components(TorchAoConfig("int4wo"), model_id=model_id)["transformer"] + transformer_int4wo_gs32 = self.get_dummy_components( + TorchAoConfig("int4wo", group_size=32), model_id=model_id + )["transformer"] + transformer_int8wo = self.get_dummy_components(TorchAoConfig("int8wo"), model_id=model_id)["transformer"] + transformer_bf16 = self.get_dummy_components(None, model_id=model_id)["transformer"] + + # Will not quantized all the layers by default due to the model weights shapes not being divisible by group_size=64 + for block in transformer_int4wo.transformer_blocks: + self.assertTrue(isinstance(block.ff.net[2].weight, AffineQuantizedTensor)) + self.assertTrue(isinstance(block.ff_context.net[2].weight, AffineQuantizedTensor)) + + # Will quantize all the linear layers except x_embedder + for name, module in transformer_int4wo_gs32.named_modules(): + if isinstance(module, nn.Linear) and name not in ["x_embedder"]: + self.assertTrue(isinstance(module.weight, AffineQuantizedTensor)) + + # Will quantize all the linear layers + for module in transformer_int8wo.modules(): + if isinstance(module, nn.Linear): + self.assertTrue(isinstance(module.weight, AffineQuantizedTensor)) + + total_int4wo = get_model_size_in_bytes(transformer_int4wo) + total_int4wo_gs32 = get_model_size_in_bytes(transformer_int4wo_gs32) + total_int8wo = get_model_size_in_bytes(transformer_int8wo) + total_bf16 = get_model_size_in_bytes(transformer_bf16) + + # TODO: refactor to align with other quantization tests + # Latter has smaller group size, so more groups -> more scales and zero points + self.assertTrue(total_int4wo < total_int4wo_gs32) + # int8 quantizes more layers compare to int4 with default group size + self.assertTrue(total_int8wo < total_int4wo) + # int4wo does not quantize too many layers because of default group size, but for the layers it does + # there is additional overhead of scales and zero points + self.assertTrue(total_bf16 < total_int4wo) + + def test_model_memory_usage(self): + model_id = "hf-internal-testing/tiny-flux-pipe" + expected_memory_saving_ratio = 2.0 + + inputs = self.get_dummy_tensor_inputs(device=torch_device) + + transformer_bf16 = self.get_dummy_components(None, model_id=model_id)["transformer"] + transformer_bf16.to(torch_device) + unquantized_model_memory = get_memory_consumption_stat(transformer_bf16, inputs) + del transformer_bf16 + + transformer_int8wo = self.get_dummy_components(TorchAoConfig("int8wo"), model_id=model_id)["transformer"] + transformer_int8wo.to(torch_device) + quantized_model_memory = get_memory_consumption_stat(transformer_int8wo, inputs) + assert unquantized_model_memory / quantized_model_memory >= expected_memory_saving_ratio + + def test_wrong_config(self): + with self.assertRaises(ValueError): + self.get_dummy_components(TorchAoConfig("int42")) + + def test_sequential_cpu_offload(self): + r""" + A test that checks if inference runs as expected when sequential cpu offloading is enabled. + """ + quantization_config = TorchAoConfig("int8wo") + components = self.get_dummy_components(quantization_config) + pipe = FluxPipeline(**components) + pipe.enable_sequential_cpu_offload() + + inputs = self.get_dummy_inputs(torch_device) + _ = pipe(**inputs) + + +# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners +@require_torch +@require_torch_accelerator +@require_torchao_version_greater_or_equal("0.7.0") +class TorchAoSerializationTest(unittest.TestCase): + model_name = "hf-internal-testing/tiny-flux-pipe" + + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_model(self, quant_method, quant_method_kwargs, device=None): + quantization_config = TorchAoConfig(quant_method, **quant_method_kwargs) + quantized_model = FluxTransformer2DModel.from_pretrained( + self.model_name, + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ) + return quantized_model.to(device) + + def get_dummy_tensor_inputs(self, device=None, seed: int = 0): + batch_size = 1 + num_latent_channels = 4 + num_image_channels = 3 + height = width = 4 + sequence_length = 48 + embedding_dim = 32 + + torch.manual_seed(seed) + hidden_states = torch.randn((batch_size, height * width, num_latent_channels)).to(device, dtype=torch.bfloat16) + encoder_hidden_states = torch.randn((batch_size, sequence_length, embedding_dim)).to( + device, dtype=torch.bfloat16 + ) + pooled_prompt_embeds = torch.randn((batch_size, embedding_dim)).to(device, dtype=torch.bfloat16) + text_ids = torch.randn((sequence_length, num_image_channels)).to(device, dtype=torch.bfloat16) + image_ids = torch.randn((height * width, num_image_channels)).to(device, dtype=torch.bfloat16) + timestep = torch.tensor([1.0]).to(device, dtype=torch.bfloat16).expand(batch_size) + + return { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_prompt_embeds, + "txt_ids": text_ids, + "img_ids": image_ids, + "timestep": timestep, + } + + def _test_original_model_expected_slice(self, quant_method, quant_method_kwargs, expected_slice): + quantized_model = self.get_dummy_model(quant_method, quant_method_kwargs, torch_device) + inputs = self.get_dummy_tensor_inputs(torch_device) + output = quantized_model(**inputs)[0] + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + weight = quantized_model.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, (AffineQuantizedTensor, LinearActivationQuantizedTensor))) + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 1e-3) + + def _check_serialization_expected_slice(self, quant_method, quant_method_kwargs, expected_slice, device): + quantized_model = self.get_dummy_model(quant_method, quant_method_kwargs, device) + + with tempfile.TemporaryDirectory() as tmp_dir: + quantized_model.save_pretrained(tmp_dir, safe_serialization=False) + loaded_quantized_model = FluxTransformer2DModel.from_pretrained( + tmp_dir, torch_dtype=torch.bfloat16, use_safetensors=False + ).to(device=torch_device) + + inputs = self.get_dummy_tensor_inputs(torch_device) + output = loaded_quantized_model(**inputs)[0] + + output_slice = output.flatten()[-9:].detach().float().cpu().numpy() + self.assertTrue( + isinstance( + loaded_quantized_model.proj_out.weight, (AffineQuantizedTensor, LinearActivationQuantizedTensor) + ) + ) + self.assertTrue(numpy_cosine_similarity_distance(output_slice, expected_slice) < 1e-3) + + def test_int_a8w8_accelerator(self): + quant_method, quant_method_kwargs = "int8_dynamic_activation_int8_weight", {} + expected_slice = np.array([0.3633, -0.1357, -0.0188, -0.249, -0.4688, 0.5078, -0.1289, -0.6914, 0.4551]) + device = torch_device + self._test_original_model_expected_slice(quant_method, quant_method_kwargs, expected_slice) + self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device) + + def test_int_a16w8_accelerator(self): + quant_method, quant_method_kwargs = "int8_weight_only", {} + expected_slice = np.array([0.3613, -0.127, -0.0223, -0.2539, -0.459, 0.4961, -0.1357, -0.6992, 0.4551]) + device = torch_device + self._test_original_model_expected_slice(quant_method, quant_method_kwargs, expected_slice) + self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device) + + def test_int_a8w8_cpu(self): + quant_method, quant_method_kwargs = "int8_dynamic_activation_int8_weight", {} + expected_slice = np.array([0.3633, -0.1357, -0.0188, -0.249, -0.4688, 0.5078, -0.1289, -0.6914, 0.4551]) + device = "cpu" + self._test_original_model_expected_slice(quant_method, quant_method_kwargs, expected_slice) + self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device) + + def test_int_a16w8_cpu(self): + quant_method, quant_method_kwargs = "int8_weight_only", {} + expected_slice = np.array([0.3613, -0.127, -0.0223, -0.2539, -0.459, 0.4961, -0.1357, -0.6992, 0.4551]) + device = "cpu" + self._test_original_model_expected_slice(quant_method, quant_method_kwargs, expected_slice) + self._check_serialization_expected_slice(quant_method, quant_method_kwargs, expected_slice, device) + + +@require_torchao_version_greater_or_equal("0.7.0") +class TorchAoCompileTest(QuantCompileTests, unittest.TestCase): + @property + def quantization_config(self): + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": TorchAoConfig(quant_type="int8_weight_only"), + }, + ) + + @unittest.skip( + "Changing the device of AQT tensor with module._apply (called from doing module.to() in accelerate) does not work " + "when compiling." + ) + def test_torch_compile_with_cpu_offload(self): + # RuntimeError: _apply(): Couldn't swap Linear.weight + super().test_torch_compile_with_cpu_offload() + + @parameterized.expand([False, True]) + @unittest.skip( + """ + For `use_stream=False`: + - Changing the device of AQT tensor, with `param.data = param.data.to(device)` as done in group offloading implementation + is unsupported in TorchAO. When compiling, FakeTensor device mismatch causes failure. + For `use_stream=True`: + Using non-default stream requires ability to pin tensors. AQT does not seem to support this yet in TorchAO. + """ + ) + def test_torch_compile_with_group_offload_leaf(self, use_stream): + # For use_stream=False: + # If we run group offloading without compilation, we will see: + # RuntimeError: Attempted to set the storage of a tensor on device "cpu" to a storage on different device "cuda:0". This is no longer allowed; the devices must match. + # When running with compilation, the error ends up being different: + # Dynamo failed to run FX node with fake tensors: call_function (*(FakeTensor(..., device='cuda:0', size=(s0, 256), dtype=torch.bfloat16), AffineQuantizedTensor(tensor_impl=PlainAQTTensorImpl(data=FakeTensor(..., size=(1536, 256), dtype=torch.int8)... , scale=FakeTensor(..., size=(1536,), dtype=torch.bfloat16)... , zero_point=FakeTensor(..., size=(1536,), dtype=torch.int64)... , _layout=PlainLayout()), block_size=(1, 256), shape=torch.Size([1536, 256]), device=cpu, dtype=torch.bfloat16, requires_grad=False), Parameter(FakeTensor(..., device='cuda:0', size=(1536,), dtype=torch.bfloat16, + # requires_grad=True))), **{}): got RuntimeError('Unhandled FakeTensor Device Propagation for aten.mm.default, found two different devices cuda:0, cpu') + # Looks like something that will have to be looked into upstream. + # for linear layers, weight.tensor_impl shows cuda... but: + # weight.tensor_impl.{data,scale,zero_point}.device will be cpu + + # For use_stream=True: + # NotImplementedError: AffineQuantizedTensor dispatch: attempting to run unimplemented operator/function: func=, types=(,), arg_types=(,), kwarg_types={} + super()._test_torch_compile_with_group_offload_leaf(use_stream=use_stream) + + +# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners +@require_torch +@require_torch_accelerator +@require_torchao_version_greater_or_equal("0.7.0") +@slow +@nightly +class SlowTorchAoTests(unittest.TestCase): + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_components(self, quantization_config: TorchAoConfig): + # This is just for convenience, so that we can modify it at one place for custom environments and locally testing + cache_dir = None + model_id = "black-forest-labs/FLUX.1-dev" + transformer = FluxTransformer2DModel.from_pretrained( + model_id, + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + cache_dir=cache_dir, + ) + text_encoder = CLIPTextModel.from_pretrained( + model_id, subfolder="text_encoder", torch_dtype=torch.bfloat16, cache_dir=cache_dir + ) + text_encoder_2 = T5EncoderModel.from_pretrained( + model_id, subfolder="text_encoder_2", torch_dtype=torch.bfloat16, cache_dir=cache_dir + ) + tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer", cache_dir=cache_dir) + tokenizer_2 = AutoTokenizer.from_pretrained(model_id, subfolder="tokenizer_2", cache_dir=cache_dir) + vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.bfloat16, cache_dir=cache_dir) + scheduler = FlowMatchEulerDiscreteScheduler() + + return { + "scheduler": scheduler, + "text_encoder": text_encoder, + "text_encoder_2": text_encoder_2, + "tokenizer": tokenizer, + "tokenizer_2": tokenizer_2, + "transformer": transformer, + "vae": vae, + } + + def get_dummy_inputs(self, device: torch.device, seed: int = 0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator().manual_seed(seed) + + inputs = { + "prompt": "an astronaut riding a horse in space", + "height": 512, + "width": 512, + "num_inference_steps": 20, + "output_type": "np", + "generator": generator, + } + + return inputs + + def _test_quant_type(self, quantization_config, expected_slice): + components = self.get_dummy_components(quantization_config) + pipe = FluxPipeline(**components) + pipe.enable_model_cpu_offload() + + weight = pipe.transformer.transformer_blocks[0].ff.net[2].weight + self.assertTrue(isinstance(weight, (AffineQuantizedTensor, LinearActivationQuantizedTensor))) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0].flatten() + output_slice = np.concatenate((output[:16], output[-16:])) + self.assertTrue(np.allclose(output_slice, expected_slice, atol=1e-3, rtol=1e-3)) + + def test_quantization(self): + # fmt: off + QUANTIZATION_TYPES_TO_TEST = [ + ("int8wo", np.array([0.0505, 0.0742, 0.1367, 0.0429, 0.0585, 0.1386, 0.0585, 0.0703, 0.1367, 0.0566, 0.0703, 0.1464, 0.0546, 0.0703, 0.1425, 0.0546, 0.3535, 0.7578, 0.5000, 0.4062, 0.7656, 0.5117, 0.4121, 0.7656, 0.5117, 0.3984, 0.7578, 0.5234, 0.4023, 0.7382, 0.5390, 0.4570])), + ("int8dq", np.array([0.0546, 0.0761, 0.1386, 0.0488, 0.0644, 0.1425, 0.0605, 0.0742, 0.1406, 0.0625, 0.0722, 0.1523, 0.0625, 0.0742, 0.1503, 0.0605, 0.3886, 0.7968, 0.5507, 0.4492, 0.7890, 0.5351, 0.4316, 0.8007, 0.5390, 0.4179, 0.8281, 0.5820, 0.4531, 0.7812, 0.5703, 0.4921])), + ] + + if TorchAoConfig._is_xpu_or_cuda_capability_atleast_8_9(): + QUANTIZATION_TYPES_TO_TEST.extend([ + ("float8wo_e4m3", np.array([0.0546, 0.0722, 0.1328, 0.0468, 0.0585, 0.1367, 0.0605, 0.0703, 0.1328, 0.0625, 0.0703, 0.1445, 0.0585, 0.0703, 0.1406, 0.0605, 0.3496, 0.7109, 0.4843, 0.4042, 0.7226, 0.5000, 0.4160, 0.7031, 0.4824, 0.3886, 0.6757, 0.4667, 0.3710, 0.6679, 0.4902, 0.4238])), + ("fp5_e3m1", np.array([0.0527, 0.0762, 0.1309, 0.0449, 0.0645, 0.1328, 0.0566, 0.0723, 0.125, 0.0566, 0.0703, 0.1328, 0.0566, 0.0742, 0.1348, 0.0566, 0.3633, 0.7617, 0.5273, 0.4277, 0.7891, 0.5469, 0.4375, 0.8008, 0.5586, 0.4336, 0.7383, 0.5156, 0.3906, 0.6992, 0.5156, 0.4375])), + ]) + # fmt: on + + for quantization_name, expected_slice in QUANTIZATION_TYPES_TO_TEST: + quantization_config = TorchAoConfig(quant_type=quantization_name, modules_to_not_convert=["x_embedder"]) + self._test_quant_type(quantization_config, expected_slice) + gc.collect() + backend_empty_cache(torch_device) + backend_synchronize(torch_device) + + def test_serialization_int8wo(self): + quantization_config = TorchAoConfig("int8wo") + components = self.get_dummy_components(quantization_config) + pipe = FluxPipeline(**components) + pipe.enable_model_cpu_offload() + + weight = pipe.transformer.x_embedder.weight + self.assertTrue(isinstance(weight, AffineQuantizedTensor)) + + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0].flatten()[:128] + + with tempfile.TemporaryDirectory() as tmp_dir: + pipe.transformer.save_pretrained(tmp_dir, safe_serialization=False) + pipe.remove_all_hooks() + del pipe.transformer + gc.collect() + backend_empty_cache(torch_device) + backend_synchronize(torch_device) + transformer = FluxTransformer2DModel.from_pretrained( + tmp_dir, torch_dtype=torch.bfloat16, use_safetensors=False + ) + pipe.transformer = transformer + pipe.enable_model_cpu_offload() + + weight = transformer.x_embedder.weight + self.assertTrue(isinstance(weight, AffineQuantizedTensor)) + + loaded_output = pipe(**inputs)[0].flatten()[:128] + # Seems to require higher tolerance depending on which machine it is being run. + # A difference of 0.06 in normalized pixel space (-1 to 1), corresponds to a difference of + # 0.06 / 2 * 255 = 7.65 in pixel space (0 to 255). On our CI runners, the difference is about 0.04, + # on DGX it is 0.06, and on audace it is 0.037. So, we are using a tolerance of 0.06 here. + self.assertTrue(np.allclose(output, loaded_output, atol=0.06)) + + def test_memory_footprint_int4wo(self): + # The original checkpoints are in bf16 and about 24 GB + expected_memory_in_gb = 6.0 + quantization_config = TorchAoConfig("int4wo") + cache_dir = None + transformer = FluxTransformer2DModel.from_pretrained( + "black-forest-labs/FLUX.1-dev", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + cache_dir=cache_dir, + ) + int4wo_memory_in_gb = get_model_size_in_bytes(transformer) / 1024**3 + self.assertTrue(int4wo_memory_in_gb < expected_memory_in_gb) + + def test_memory_footprint_int8wo(self): + # The original checkpoints are in bf16 and about 24 GB + expected_memory_in_gb = 12.0 + quantization_config = TorchAoConfig("int8wo") + cache_dir = None + transformer = FluxTransformer2DModel.from_pretrained( + "black-forest-labs/FLUX.1-dev", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + cache_dir=cache_dir, + ) + int8wo_memory_in_gb = get_model_size_in_bytes(transformer) / 1024**3 + self.assertTrue(int8wo_memory_in_gb < expected_memory_in_gb) + + +@require_torch +@require_torch_accelerator +@require_torchao_version_greater_or_equal("0.7.0") +@slow +@nightly +class SlowTorchAoPreserializedModelTests(unittest.TestCase): + def tearDown(self): + gc.collect() + backend_empty_cache(torch_device) + + def get_dummy_inputs(self, device: torch.device, seed: int = 0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator().manual_seed(seed) + + inputs = { + "prompt": "an astronaut riding a horse in space", + "height": 512, + "width": 512, + "num_inference_steps": 20, + "output_type": "np", + "generator": generator, + } + + return inputs + + def test_transformer_int8wo(self): + # fmt: off + expected_slice = np.array([0.0566, 0.0781, 0.1426, 0.0488, 0.0684, 0.1504, 0.0625, 0.0781, 0.1445, 0.0625, 0.0781, 0.1562, 0.0547, 0.0723, 0.1484, 0.0566, 0.5703, 0.8867, 0.7266, 0.5742, 0.875, 0.7148, 0.5586, 0.875, 0.7148, 0.5547, 0.8633, 0.7109, 0.5469, 0.8398, 0.6992, 0.5703]) + # fmt: on + + # This is just for convenience, so that we can modify it at one place for custom environments and locally testing + cache_dir = None + transformer = FluxTransformer2DModel.from_pretrained( + "hf-internal-testing/FLUX.1-Dev-TorchAO-int8wo-transformer", + torch_dtype=torch.bfloat16, + use_safetensors=False, + cache_dir=cache_dir, + ) + pipe = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16, cache_dir=cache_dir + ) + pipe.enable_model_cpu_offload() + + # Verify that all linear layer weights are quantized + for name, module in pipe.transformer.named_modules(): + if isinstance(module, nn.Linear): + self.assertTrue(isinstance(module.weight, AffineQuantizedTensor)) + + # Verify outputs match expected slice + inputs = self.get_dummy_inputs(torch_device) + output = pipe(**inputs)[0].flatten() + output_slice = np.concatenate((output[:16], output[-16:])) + self.assertTrue(np.allclose(output_slice, expected_slice, atol=1e-3, rtol=1e-3)) diff --git a/pythonProject/diffusers-main/tests/quantization/utils.py b/pythonProject/diffusers-main/tests/quantization/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a74ece5a3a3ad27de1abc004cdb2f34ec4c28287 --- /dev/null +++ b/pythonProject/diffusers-main/tests/quantization/utils.py @@ -0,0 +1,45 @@ +from diffusers.utils import is_torch_available + +from ..testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_peak_memory_stats, + torch_device, +) + + +if is_torch_available(): + import torch + import torch.nn as nn + + class LoRALayer(nn.Module): + """Wraps a linear layer with LoRA-like adapter - Used for testing purposes only + + Taken from + https://github.com/huggingface/transformers/blob/566302686a71de14125717dea9a6a45b24d42b37/tests/quantization/bnb/test_4bit.py#L62C5-L78C77 + """ + + def __init__(self, module: nn.Module, rank: int): + super().__init__() + self.module = module + self.adapter = nn.Sequential( + nn.Linear(module.in_features, rank, bias=False), + nn.Linear(rank, module.out_features, bias=False), + ) + small_std = (2.0 / (5 * min(module.in_features, module.out_features))) ** 0.5 + nn.init.normal_(self.adapter[0].weight, std=small_std) + nn.init.zeros_(self.adapter[1].weight) + self.adapter.to(module.weight.device) + + def forward(self, input, *args, **kwargs): + return self.module(input, *args, **kwargs) + self.adapter(input) + + @torch.no_grad() + @torch.inference_mode() + def get_memory_consumption_stat(model, inputs): + backend_reset_peak_memory_stats(torch_device) + backend_empty_cache(torch_device) + + model(**inputs) + max_mem_allocated = backend_max_memory_allocated(torch_device) + return max_mem_allocated diff --git a/pythonProject/diffusers-main/tests/remote/__init__.py b/pythonProject/diffusers-main/tests/remote/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/remote/test_remote_decode.py b/pythonProject/diffusers-main/tests/remote/test_remote_decode.py new file mode 100644 index 0000000000000000000000000000000000000000..27170cba08356c62e51ed0a99f653691ba7a53d6 --- /dev/null +++ b/pythonProject/diffusers-main/tests/remote/test_remote_decode.py @@ -0,0 +1,537 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typing import Tuple, Union + +import numpy as np +import PIL.Image +import torch + +from diffusers.image_processor import VaeImageProcessor +from diffusers.utils.constants import ( + DECODE_ENDPOINT_FLUX, + DECODE_ENDPOINT_HUNYUAN_VIDEO, + DECODE_ENDPOINT_SD_V1, + DECODE_ENDPOINT_SD_XL, +) +from diffusers.utils.remote_utils import ( + remote_decode, +) +from diffusers.video_processor import VideoProcessor + +from ..testing_utils import ( + enable_full_determinism, + slow, + torch_all_close, + torch_device, +) + + +enable_full_determinism() + + +class RemoteAutoencoderKLMixin: + shape: Tuple[int, ...] = None + out_hw: Tuple[int, int] = None + endpoint: str = None + dtype: torch.dtype = None + scaling_factor: float = None + shift_factor: float = None + processor_cls: Union[VaeImageProcessor, VideoProcessor] = None + output_pil_slice: torch.Tensor = None + output_pt_slice: torch.Tensor = None + partial_postprocess_return_pt_slice: torch.Tensor = None + return_pt_slice: torch.Tensor = None + width: int = None + height: int = None + + def get_dummy_inputs(self): + inputs = { + "endpoint": self.endpoint, + "tensor": torch.randn( + self.shape, + device=torch_device, + dtype=self.dtype, + generator=torch.Generator(torch_device).manual_seed(13), + ), + "scaling_factor": self.scaling_factor, + "shift_factor": self.shift_factor, + "height": self.height, + "width": self.width, + } + return inputs + + def test_no_scaling(self): + inputs = self.get_dummy_inputs() + if inputs["scaling_factor"] is not None: + inputs["tensor"] = inputs["tensor"] / inputs["scaling_factor"] + inputs["scaling_factor"] = None + if inputs["shift_factor"] is not None: + inputs["tensor"] = inputs["tensor"] + inputs["shift_factor"] + inputs["shift_factor"] = None + processor = self.processor_cls() + output = remote_decode( + output_type="pt", + # required for now, will be removed in next update + do_scaling=False, + processor=processor, + **inputs, + ) + assert isinstance(output, PIL.Image.Image) + self.assertTrue(isinstance(output, PIL.Image.Image), f"Expected `PIL.Image.Image` output, got {type(output)}") + self.assertEqual(output.height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.height}") + self.assertEqual(output.width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.width}") + output_slice = torch.from_numpy(np.array(output)[0, -3:, -3:].flatten()) + # Increased tolerance for Flux Packed diff [1, 0, 1, 0, 0, 0, 0, 0, 0] + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1, atol=1), + f"{output_slice}", + ) + + def test_output_type_pt(self): + inputs = self.get_dummy_inputs() + processor = self.processor_cls() + output = remote_decode(output_type="pt", processor=processor, **inputs) + assert isinstance(output, PIL.Image.Image) + self.assertTrue(isinstance(output, PIL.Image.Image), f"Expected `PIL.Image.Image` output, got {type(output)}") + self.assertEqual(output.height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.height}") + self.assertEqual(output.width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.width}") + output_slice = torch.from_numpy(np.array(output)[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1e-2), f"{output_slice}" + ) + + # output is visually the same, slice is flaky? + def test_output_type_pil(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pil", **inputs) + self.assertTrue(isinstance(output, PIL.Image.Image), f"Expected `PIL.Image.Image` output, got {type(output)}") + self.assertEqual(output.height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.height}") + self.assertEqual(output.width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.width}") + + def test_output_type_pil_image_format(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pil", image_format="png", **inputs) + self.assertTrue(isinstance(output, PIL.Image.Image), f"Expected `PIL.Image.Image` output, got {type(output)}") + self.assertEqual(output.height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.height}") + self.assertEqual(output.width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.width}") + self.assertEqual(output.format, "png", f"Expected image format `png`, got {output.format}") + output_slice = torch.from_numpy(np.array(output)[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1e-2), f"{output_slice}" + ) + + def test_output_type_pt_partial_postprocess(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pt", partial_postprocess=True, **inputs) + self.assertTrue(isinstance(output, PIL.Image.Image), f"Expected `PIL.Image.Image` output, got {type(output)}") + self.assertEqual(output.height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.height}") + self.assertEqual(output.width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.width}") + output_slice = torch.from_numpy(np.array(output)[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1e-2), f"{output_slice}" + ) + + def test_output_type_pt_return_type_pt(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pt", return_type="pt", **inputs) + self.assertTrue(isinstance(output, torch.Tensor), f"Expected `torch.Tensor` output, got {type(output)}") + self.assertEqual( + output.shape[2], self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.shape[2]}" + ) + self.assertEqual( + output.shape[3], self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.shape[3]}" + ) + output_slice = output[0, 0, -3:, -3:].flatten() + self.assertTrue( + torch_all_close(output_slice, self.return_pt_slice.to(output_slice.dtype), rtol=1e-3, atol=1e-3), + f"{output_slice}", + ) + + def test_output_type_pt_partial_postprocess_return_type_pt(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pt", partial_postprocess=True, return_type="pt", **inputs) + self.assertTrue(isinstance(output, torch.Tensor), f"Expected `torch.Tensor` output, got {type(output)}") + self.assertEqual( + output.shape[1], self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.shape[1]}" + ) + self.assertEqual( + output.shape[2], self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.shape[2]}" + ) + output_slice = output[0, -3:, -3:, 0].flatten().cpu() + self.assertTrue( + torch_all_close(output_slice, self.partial_postprocess_return_pt_slice.to(output_slice.dtype), rtol=1e-2), + f"{output_slice}", + ) + + def test_do_scaling_deprecation(self): + inputs = self.get_dummy_inputs() + inputs.pop("scaling_factor", None) + inputs.pop("shift_factor", None) + with self.assertWarns(FutureWarning) as warning: + _ = remote_decode(output_type="pt", partial_postprocess=True, **inputs) + self.assertEqual( + str(warning.warnings[0].message), + "`do_scaling` is deprecated, pass `scaling_factor` and `shift_factor` if required.", + str(warning.warnings[0].message), + ) + + def test_input_tensor_type_base64_deprecation(self): + inputs = self.get_dummy_inputs() + with self.assertWarns(FutureWarning) as warning: + _ = remote_decode(output_type="pt", input_tensor_type="base64", partial_postprocess=True, **inputs) + self.assertEqual( + str(warning.warnings[0].message), + "input_tensor_type='base64' is deprecated. Using `binary`.", + str(warning.warnings[0].message), + ) + + def test_output_tensor_type_base64_deprecation(self): + inputs = self.get_dummy_inputs() + with self.assertWarns(FutureWarning) as warning: + _ = remote_decode(output_type="pt", output_tensor_type="base64", partial_postprocess=True, **inputs) + self.assertEqual( + str(warning.warnings[0].message), + "output_tensor_type='base64' is deprecated. Using `binary`.", + str(warning.warnings[0].message), + ) + + +class RemoteAutoencoderKLHunyuanVideoMixin(RemoteAutoencoderKLMixin): + def test_no_scaling(self): + inputs = self.get_dummy_inputs() + if inputs["scaling_factor"] is not None: + inputs["tensor"] = inputs["tensor"] / inputs["scaling_factor"] + inputs["scaling_factor"] = None + if inputs["shift_factor"] is not None: + inputs["tensor"] = inputs["tensor"] + inputs["shift_factor"] + inputs["shift_factor"] = None + processor = self.processor_cls() + output = remote_decode( + output_type="pt", + # required for now, will be removed in next update + do_scaling=False, + processor=processor, + **inputs, + ) + self.assertTrue( + isinstance(output, list) and isinstance(output[0], PIL.Image.Image), + f"Expected `List[PIL.Image.Image]` output, got {type(output)}", + ) + self.assertEqual( + output[0].height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output[0].height}" + ) + self.assertEqual( + output[0].width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output[0].width}" + ) + output_slice = torch.from_numpy(np.array(output[0])[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1, atol=1), + f"{output_slice}", + ) + + def test_output_type_pt(self): + inputs = self.get_dummy_inputs() + processor = self.processor_cls() + output = remote_decode(output_type="pt", processor=processor, **inputs) + self.assertTrue( + isinstance(output, list) and isinstance(output[0], PIL.Image.Image), + f"Expected `List[PIL.Image.Image]` output, got {type(output)}", + ) + self.assertEqual( + output[0].height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output[0].height}" + ) + self.assertEqual( + output[0].width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output[0].width}" + ) + output_slice = torch.from_numpy(np.array(output[0])[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1, atol=1), + f"{output_slice}", + ) + + # output is visually the same, slice is flaky? + def test_output_type_pil(self): + inputs = self.get_dummy_inputs() + processor = self.processor_cls() + output = remote_decode(output_type="pil", processor=processor, **inputs) + self.assertTrue( + isinstance(output, list) and isinstance(output[0], PIL.Image.Image), + f"Expected `List[PIL.Image.Image]` output, got {type(output)}", + ) + self.assertEqual( + output[0].height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output[0].height}" + ) + self.assertEqual( + output[0].width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output[0].width}" + ) + + def test_output_type_pil_image_format(self): + inputs = self.get_dummy_inputs() + processor = self.processor_cls() + output = remote_decode(output_type="pil", processor=processor, image_format="png", **inputs) + self.assertTrue( + isinstance(output, list) and isinstance(output[0], PIL.Image.Image), + f"Expected `List[PIL.Image.Image]` output, got {type(output)}", + ) + self.assertEqual( + output[0].height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output[0].height}" + ) + self.assertEqual( + output[0].width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output[0].width}" + ) + output_slice = torch.from_numpy(np.array(output[0])[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1, atol=1), + f"{output_slice}", + ) + + def test_output_type_pt_partial_postprocess(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pt", partial_postprocess=True, **inputs) + self.assertTrue( + isinstance(output, list) and isinstance(output[0], PIL.Image.Image), + f"Expected `List[PIL.Image.Image]` output, got {type(output)}", + ) + self.assertEqual( + output[0].height, self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output[0].height}" + ) + self.assertEqual( + output[0].width, self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output[0].width}" + ) + output_slice = torch.from_numpy(np.array(output[0])[0, -3:, -3:].flatten()) + self.assertTrue( + torch_all_close(output_slice, self.output_pt_slice.to(output_slice.dtype), rtol=1, atol=1), + f"{output_slice}", + ) + + def test_output_type_pt_return_type_pt(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="pt", return_type="pt", **inputs) + self.assertTrue(isinstance(output, torch.Tensor), f"Expected `torch.Tensor` output, got {type(output)}") + self.assertEqual( + output.shape[3], self.out_hw[0], f"Expected image height {self.out_hw[0]}, got {output.shape[2]}" + ) + self.assertEqual( + output.shape[4], self.out_hw[1], f"Expected image width {self.out_hw[0]}, got {output.shape[3]}" + ) + output_slice = output[0, 0, 0, -3:, -3:].flatten() + self.assertTrue( + torch_all_close(output_slice, self.return_pt_slice.to(output_slice.dtype), rtol=1e-3, atol=1e-3), + f"{output_slice}", + ) + + def test_output_type_mp4(self): + inputs = self.get_dummy_inputs() + output = remote_decode(output_type="mp4", return_type="mp4", **inputs) + self.assertTrue(isinstance(output, bytes), f"Expected `bytes` output, got {type(output)}") + + +class RemoteAutoencoderKLSDv1Tests( + RemoteAutoencoderKLMixin, + unittest.TestCase, +): + shape = ( + 1, + 4, + 64, + 64, + ) + out_hw = ( + 512, + 512, + ) + endpoint = DECODE_ENDPOINT_SD_V1 + dtype = torch.float16 + scaling_factor = 0.18215 + shift_factor = None + processor_cls = VaeImageProcessor + output_pt_slice = torch.tensor([31, 15, 11, 55, 30, 21, 66, 42, 30], dtype=torch.uint8) + partial_postprocess_return_pt_slice = torch.tensor([100, 130, 99, 133, 106, 112, 97, 100, 121], dtype=torch.uint8) + return_pt_slice = torch.tensor([-0.2177, 0.0217, -0.2258, 0.0412, -0.1687, -0.1232, -0.2416, -0.2130, -0.0543]) + + +class RemoteAutoencoderKLSDXLTests( + RemoteAutoencoderKLMixin, + unittest.TestCase, +): + shape = ( + 1, + 4, + 128, + 128, + ) + out_hw = ( + 1024, + 1024, + ) + endpoint = DECODE_ENDPOINT_SD_XL + dtype = torch.float16 + scaling_factor = 0.13025 + shift_factor = None + processor_cls = VaeImageProcessor + output_pt_slice = torch.tensor([104, 52, 23, 114, 61, 35, 108, 87, 38], dtype=torch.uint8) + partial_postprocess_return_pt_slice = torch.tensor([77, 86, 89, 49, 60, 75, 52, 65, 78], dtype=torch.uint8) + return_pt_slice = torch.tensor([-0.3945, -0.3289, -0.2993, -0.6177, -0.5259, -0.4119, -0.5898, -0.4863, -0.3845]) + + +class RemoteAutoencoderKLFluxTests( + RemoteAutoencoderKLMixin, + unittest.TestCase, +): + shape = ( + 1, + 16, + 128, + 128, + ) + out_hw = ( + 1024, + 1024, + ) + endpoint = DECODE_ENDPOINT_FLUX + dtype = torch.bfloat16 + scaling_factor = 0.3611 + shift_factor = 0.1159 + processor_cls = VaeImageProcessor + output_pt_slice = torch.tensor([110, 72, 91, 62, 35, 52, 69, 55, 69], dtype=torch.uint8) + partial_postprocess_return_pt_slice = torch.tensor( + [202, 203, 203, 197, 195, 193, 189, 188, 178], dtype=torch.uint8 + ) + return_pt_slice = torch.tensor([0.5820, 0.5962, 0.5898, 0.5439, 0.5327, 0.5112, 0.4797, 0.4773, 0.3984]) + + +class RemoteAutoencoderKLFluxPackedTests( + RemoteAutoencoderKLMixin, + unittest.TestCase, +): + shape = ( + 1, + 4096, + 64, + ) + out_hw = ( + 1024, + 1024, + ) + height = 1024 + width = 1024 + endpoint = DECODE_ENDPOINT_FLUX + dtype = torch.bfloat16 + scaling_factor = 0.3611 + shift_factor = 0.1159 + processor_cls = VaeImageProcessor + # slices are different due to randn on different shape. we can pack the latent instead if we want the same + output_pt_slice = torch.tensor([96, 116, 157, 45, 67, 104, 34, 56, 89], dtype=torch.uint8) + partial_postprocess_return_pt_slice = torch.tensor( + [168, 212, 202, 155, 191, 185, 150, 180, 168], dtype=torch.uint8 + ) + return_pt_slice = torch.tensor([0.3198, 0.6631, 0.5864, 0.2131, 0.4944, 0.4482, 0.1776, 0.4153, 0.3176]) + + +class RemoteAutoencoderKLHunyuanVideoTests( + RemoteAutoencoderKLHunyuanVideoMixin, + unittest.TestCase, +): + shape = ( + 1, + 16, + 3, + 40, + 64, + ) + out_hw = ( + 320, + 512, + ) + endpoint = DECODE_ENDPOINT_HUNYUAN_VIDEO + dtype = torch.float16 + scaling_factor = 0.476986 + processor_cls = VideoProcessor + output_pt_slice = torch.tensor([112, 92, 85, 112, 93, 85, 112, 94, 85], dtype=torch.uint8) + partial_postprocess_return_pt_slice = torch.tensor( + [149, 161, 168, 136, 150, 156, 129, 143, 149], dtype=torch.uint8 + ) + return_pt_slice = torch.tensor([0.1656, 0.2661, 0.3157, 0.0693, 0.1755, 0.2252, 0.0127, 0.1221, 0.1708]) + + +class RemoteAutoencoderKLSlowTestMixin: + channels: int = 4 + endpoint: str = None + dtype: torch.dtype = None + scaling_factor: float = None + shift_factor: float = None + width: int = None + height: int = None + + def get_dummy_inputs(self): + inputs = { + "endpoint": self.endpoint, + "scaling_factor": self.scaling_factor, + "shift_factor": self.shift_factor, + "height": self.height, + "width": self.width, + } + return inputs + + def test_multi_res(self): + inputs = self.get_dummy_inputs() + for height in {320, 512, 640, 704, 896, 1024, 1208, 1384, 1536, 1608, 1864, 2048}: + for width in {320, 512, 640, 704, 896, 1024, 1208, 1384, 1536, 1608, 1864, 2048}: + inputs["tensor"] = torch.randn( + (1, self.channels, height // 8, width // 8), + device=torch_device, + dtype=self.dtype, + generator=torch.Generator(torch_device).manual_seed(13), + ) + inputs["height"] = height + inputs["width"] = width + output = remote_decode(output_type="pt", partial_postprocess=True, **inputs) + output.save(f"test_multi_res_{height}_{width}.png") + + +@slow +class RemoteAutoencoderKLSDv1SlowTests( + RemoteAutoencoderKLSlowTestMixin, + unittest.TestCase, +): + endpoint = DECODE_ENDPOINT_SD_V1 + dtype = torch.float16 + scaling_factor = 0.18215 + shift_factor = None + + +@slow +class RemoteAutoencoderKLSDXLSlowTests( + RemoteAutoencoderKLSlowTestMixin, + unittest.TestCase, +): + endpoint = DECODE_ENDPOINT_SD_XL + dtype = torch.float16 + scaling_factor = 0.13025 + shift_factor = None + + +@slow +class RemoteAutoencoderKLFluxSlowTests( + RemoteAutoencoderKLSlowTestMixin, + unittest.TestCase, +): + channels = 16 + endpoint = DECODE_ENDPOINT_FLUX + dtype = torch.bfloat16 + scaling_factor = 0.3611 + shift_factor = 0.1159 diff --git a/pythonProject/diffusers-main/tests/remote/test_remote_encode.py b/pythonProject/diffusers-main/tests/remote/test_remote_encode.py new file mode 100644 index 0000000000000000000000000000000000000000..4c0daf08fd8cde87997cb2187862bee898409dbe --- /dev/null +++ b/pythonProject/diffusers-main/tests/remote/test_remote_encode.py @@ -0,0 +1,225 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import PIL.Image +import torch + +from diffusers.utils import load_image +from diffusers.utils.constants import ( + DECODE_ENDPOINT_FLUX, + DECODE_ENDPOINT_SD_V1, + DECODE_ENDPOINT_SD_XL, + ENCODE_ENDPOINT_FLUX, + ENCODE_ENDPOINT_SD_V1, + ENCODE_ENDPOINT_SD_XL, +) +from diffusers.utils.remote_utils import ( + remote_decode, + remote_encode, +) + +from ..testing_utils import ( + enable_full_determinism, + slow, +) + + +enable_full_determinism() + +IMAGE = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg?download=true" + + +class RemoteAutoencoderKLEncodeMixin: + channels: int = None + endpoint: str = None + decode_endpoint: str = None + dtype: torch.dtype = None + scaling_factor: float = None + shift_factor: float = None + image: PIL.Image.Image = None + + def get_dummy_inputs(self): + if self.image is None: + self.image = load_image(IMAGE) + inputs = { + "endpoint": self.endpoint, + "image": self.image, + "scaling_factor": self.scaling_factor, + "shift_factor": self.shift_factor, + } + return inputs + + def test_image_input(self): + inputs = self.get_dummy_inputs() + height, width = inputs["image"].height, inputs["image"].width + output = remote_encode(**inputs) + self.assertEqual(list(output.shape), [1, self.channels, height // 8, width // 8]) + decoded = remote_decode( + tensor=output, + endpoint=self.decode_endpoint, + scaling_factor=self.scaling_factor, + shift_factor=self.shift_factor, + image_format="png", + ) + self.assertEqual(decoded.height, height) + self.assertEqual(decoded.width, width) + # image_slice = torch.from_numpy(np.array(inputs["image"])[0, -3:, -3:].flatten()) + # decoded_slice = torch.from_numpy(np.array(decoded)[0, -3:, -3:].flatten()) + # TODO: how to test this? encode->decode is lossy. expected slice of encoded latent? + + +class RemoteAutoencoderKLSDv1Tests( + RemoteAutoencoderKLEncodeMixin, + unittest.TestCase, +): + channels = 4 + endpoint = ENCODE_ENDPOINT_SD_V1 + decode_endpoint = DECODE_ENDPOINT_SD_V1 + dtype = torch.float16 + scaling_factor = 0.18215 + shift_factor = None + + +class RemoteAutoencoderKLSDXLTests( + RemoteAutoencoderKLEncodeMixin, + unittest.TestCase, +): + channels = 4 + endpoint = ENCODE_ENDPOINT_SD_XL + decode_endpoint = DECODE_ENDPOINT_SD_XL + dtype = torch.float16 + scaling_factor = 0.13025 + shift_factor = None + + +class RemoteAutoencoderKLFluxTests( + RemoteAutoencoderKLEncodeMixin, + unittest.TestCase, +): + channels = 16 + endpoint = ENCODE_ENDPOINT_FLUX + decode_endpoint = DECODE_ENDPOINT_FLUX + dtype = torch.bfloat16 + scaling_factor = 0.3611 + shift_factor = 0.1159 + + +class RemoteAutoencoderKLEncodeSlowTestMixin: + channels: int = 4 + endpoint: str = None + decode_endpoint: str = None + dtype: torch.dtype = None + scaling_factor: float = None + shift_factor: float = None + image: PIL.Image.Image = None + + def get_dummy_inputs(self): + if self.image is None: + self.image = load_image(IMAGE) + inputs = { + "endpoint": self.endpoint, + "image": self.image, + "scaling_factor": self.scaling_factor, + "shift_factor": self.shift_factor, + } + return inputs + + def test_multi_res(self): + inputs = self.get_dummy_inputs() + for height in { + 320, + 512, + 640, + 704, + 896, + 1024, + 1208, + 1384, + 1536, + 1608, + 1864, + 2048, + }: + for width in { + 320, + 512, + 640, + 704, + 896, + 1024, + 1208, + 1384, + 1536, + 1608, + 1864, + 2048, + }: + inputs["image"] = inputs["image"].resize( + ( + width, + height, + ) + ) + output = remote_encode(**inputs) + self.assertEqual(list(output.shape), [1, self.channels, height // 8, width // 8]) + decoded = remote_decode( + tensor=output, + endpoint=self.decode_endpoint, + scaling_factor=self.scaling_factor, + shift_factor=self.shift_factor, + image_format="png", + ) + self.assertEqual(decoded.height, height) + self.assertEqual(decoded.width, width) + decoded.save(f"test_multi_res_{height}_{width}.png") + + +@slow +class RemoteAutoencoderKLSDv1SlowTests( + RemoteAutoencoderKLEncodeSlowTestMixin, + unittest.TestCase, +): + endpoint = ENCODE_ENDPOINT_SD_V1 + decode_endpoint = DECODE_ENDPOINT_SD_V1 + dtype = torch.float16 + scaling_factor = 0.18215 + shift_factor = None + + +@slow +class RemoteAutoencoderKLSDXLSlowTests( + RemoteAutoencoderKLEncodeSlowTestMixin, + unittest.TestCase, +): + endpoint = ENCODE_ENDPOINT_SD_XL + decode_endpoint = DECODE_ENDPOINT_SD_XL + dtype = torch.float16 + scaling_factor = 0.13025 + shift_factor = None + + +@slow +class RemoteAutoencoderKLFluxSlowTests( + RemoteAutoencoderKLEncodeSlowTestMixin, + unittest.TestCase, +): + channels = 16 + endpoint = ENCODE_ENDPOINT_FLUX + decode_endpoint = DECODE_ENDPOINT_FLUX + dtype = torch.bfloat16 + scaling_factor = 0.3611 + shift_factor = 0.1159 diff --git a/pythonProject/diffusers-main/tests/schedulers/__init__.py b/pythonProject/diffusers-main/tests/schedulers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_consistency_model.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_consistency_model.py new file mode 100644 index 0000000000000000000000000000000000000000..4f773d7db05f752d2b1e216071af5a1ca808d658 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_consistency_model.py @@ -0,0 +1,189 @@ +import torch + +from diffusers import CMStochasticIterativeScheduler + +from .test_schedulers import SchedulerCommonTest + + +class CMStochasticIterativeSchedulerTest(SchedulerCommonTest): + scheduler_classes = (CMStochasticIterativeScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 201, + "sigma_min": 0.002, + "sigma_max": 80.0, + } + + config.update(**kwargs) + return config + + # Override test_step_shape to add CMStochasticIterativeScheduler-specific logic regarding timesteps + # Problem is that we don't know two timesteps that will always be in the timestep schedule from only the scheduler + # config; scaled sigma_max is always in the timestep schedule, but sigma_min is in the sigma schedule while scaled + # sigma_min is not in the timestep schedule + def test_step_shape(self): + num_inference_steps = 10 + + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + + timestep_0 = scheduler.timesteps[0] + timestep_1 = scheduler.timesteps[1] + + sample = self.dummy_sample + residual = 0.1 * sample + + output_0 = scheduler.step(residual, timestep_0, sample).prev_sample + output_1 = scheduler.step(residual, timestep_1, sample).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_clip_denoised(self): + for clip_denoised in [True, False]: + self.check_over_configs(clip_denoised=clip_denoised) + + def test_full_loop_no_noise_onestep(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 1 + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + for i, t in enumerate(timesteps): + # 1. scale model input + scaled_sample = scheduler.scale_model_input(sample, t) + + # 2. predict noise residual + residual = model(scaled_sample, t) + + # 3. predict previous sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 192.7614) < 1e-2 + assert abs(result_mean.item() - 0.2510) < 1e-3 + + def test_full_loop_no_noise_multistep(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [106, 0] + scheduler.set_timesteps(timesteps=timesteps) + timesteps = scheduler.timesteps + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + for t in timesteps: + # 1. scale model input + scaled_sample = scheduler.scale_model_input(sample, t) + + # 2. predict noise residual + residual = model(scaled_sample, t) + + # 3. predict previous sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 347.6357) < 1e-2 + assert abs(result_mean.item() - 0.4527) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 8 + + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for t in timesteps: + # 1. scale model input + scaled_sample = scheduler.scale_model_input(sample, t) + + # 2. predict noise residual + residual = model(scaled_sample, t) + + # 3. predict previous sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 763.9186) < 1e-2, f" expected result sum 763.9186, but get {result_sum}" + assert abs(result_mean.item() - 0.9947) < 1e-3, f" expected result mean 0.9947, but get {result_mean}" + + def test_custom_timesteps_increasing_order(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [39, 30, 12, 15, 0] + + with self.assertRaises(ValueError, msg="`timesteps` must be in descending order."): + scheduler.set_timesteps(timesteps=timesteps) + + def test_custom_timesteps_passing_both_num_inference_steps_and_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [39, 30, 12, 1, 0] + num_inference_steps = len(timesteps) + + with self.assertRaises(ValueError, msg="Can only pass one of `num_inference_steps` or `timesteps`."): + scheduler.set_timesteps(num_inference_steps=num_inference_steps, timesteps=timesteps) + + def test_custom_timesteps_too_large(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [scheduler.config.num_train_timesteps] + + with self.assertRaises( + ValueError, + msg="`timesteps` must start before `self.config.train_timesteps`: {scheduler.config.num_train_timesteps}}", + ): + scheduler.set_timesteps(timesteps=timesteps) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim.py new file mode 100644 index 0000000000000000000000000000000000000000..13b353a44b0885ed52eda0ce97d3ca7bd4b18ad5 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim.py @@ -0,0 +1,176 @@ +import torch + +from diffusers import DDIMScheduler + +from .test_schedulers import SchedulerCommonTest + + +class DDIMSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DDIMScheduler,) + forward_default_kwargs = (("eta", 0.0), ("num_inference_steps", 50)) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "clip_sample": True, + } + + config.update(**kwargs) + return config + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps, eta = 10, 0.0 + + model = self.dummy_model() + sample = self.dummy_sample_deter + + scheduler.set_timesteps(num_inference_steps) + + for t in scheduler.timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, eta).prev_sample + + return sample + + def test_timesteps(self): + for timesteps in [100, 500, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_steps_offset(self): + for steps_offset in [0, 1]: + self.check_over_configs(steps_offset=steps_offset) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(steps_offset=1) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(5) + assert torch.equal(scheduler.timesteps, torch.LongTensor([801, 601, 401, 201, 1])) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "squaredcos_cap_v2"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(clip_sample=clip_sample) + + def test_timestep_spacing(self): + for timestep_spacing in ["trailing", "leading"]: + self.check_over_configs(timestep_spacing=timestep_spacing) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_time_indices(self): + for t in [1, 10, 49]: + self.check_over_forward(time_step=t) + + def test_inference_steps(self): + for t, num_inference_steps in zip([1, 10, 50], [10, 50, 500]): + self.check_over_forward(time_step=t, num_inference_steps=num_inference_steps) + + def test_eta(self): + for t, eta in zip([1, 10, 49], [0.0, 0.5, 1.0]): + self.check_over_forward(time_step=t, eta=eta) + + def test_variance(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + assert torch.sum(torch.abs(scheduler._get_variance(0, 0) - 0.0)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(420, 400) - 0.14771)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(980, 960) - 0.32460)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(0, 0) - 0.0)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(487, 486) - 0.00979)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(999, 998) - 0.02)) < 1e-5 + + def test_full_loop_no_noise(self): + sample = self.full_loop() + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 172.0067) < 1e-2 + assert abs(result_mean.item() - 0.223967) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 52.5302) < 1e-2 + assert abs(result_mean.item() - 0.0684) < 1e-3 + + def test_full_loop_with_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=True, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 149.8295) < 1e-2 + assert abs(result_mean.item() - 0.1951) < 1e-3 + + def test_full_loop_with_no_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=False, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 149.0784) < 1e-2 + assert abs(result_mean.item() - 0.1941) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps, eta = 10, 0.0 + t_start = 8 + + model = self.dummy_model() + sample = self.dummy_sample_deter + + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for t in timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, eta).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 354.5418) < 1e-2, f" expected result sum 218.4379, but get {result_sum}" + assert abs(result_mean.item() - 0.4616) < 1e-3, f" expected result mean 0.2844, but get {result_mean}" diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim_inverse.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim_inverse.py new file mode 100644 index 0000000000000000000000000000000000000000..81d53f1b477850f556ea8cf5d669698ebca295b0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim_inverse.py @@ -0,0 +1,138 @@ +import unittest + +import torch + +from diffusers import DDIMInverseScheduler + +from .test_schedulers import SchedulerCommonTest + + +class DDIMInverseSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DDIMInverseScheduler,) + forward_default_kwargs = (("num_inference_steps", 50),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "clip_sample": True, + } + + config.update(**kwargs) + return config + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + + model = self.dummy_model() + sample = self.dummy_sample_deter + + scheduler.set_timesteps(num_inference_steps) + + for t in scheduler.timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_timesteps(self): + for timesteps in [100, 500, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_steps_offset(self): + for steps_offset in [0, 1]: + self.check_over_configs(steps_offset=steps_offset) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(steps_offset=1) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(5) + assert torch.equal(scheduler.timesteps, torch.LongTensor([1, 201, 401, 601, 801])) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "squaredcos_cap_v2"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(clip_sample=clip_sample) + + def test_timestep_spacing(self): + for timestep_spacing in ["trailing", "leading"]: + self.check_over_configs(timestep_spacing=timestep_spacing) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_time_indices(self): + for t in [1, 10, 49]: + self.check_over_forward(time_step=t) + + def test_inference_steps(self): + for t, num_inference_steps in zip([1, 10, 50], [10, 50, 500]): + self.check_over_forward(time_step=t, num_inference_steps=num_inference_steps) + + @unittest.skip("Test not supported.") + def test_add_noise_device(self): + pass + + def test_full_loop_no_noise(self): + sample = self.full_loop() + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 671.6816) < 1e-2 + assert abs(result_mean.item() - 0.8746) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 1394.2185) < 1e-2 + assert abs(result_mean.item() - 1.8154) < 1e-3 + + def test_full_loop_with_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=True, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 539.9622) < 1e-2 + assert abs(result_mean.item() - 0.7031) < 1e-3 + + def test_full_loop_with_no_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=False, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 542.6722) < 1e-2 + assert abs(result_mean.item() - 0.7066) < 1e-3 diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim_parallel.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..3ce8034cfb952bc1d1e39c4e37621105e0d453a2 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddim_parallel.py @@ -0,0 +1,216 @@ +# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from diffusers import DDIMParallelScheduler + +from .test_schedulers import SchedulerCommonTest + + +class DDIMParallelSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DDIMParallelScheduler,) + forward_default_kwargs = (("eta", 0.0), ("num_inference_steps", 50)) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "clip_sample": True, + } + + config.update(**kwargs) + return config + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps, eta = 10, 0.0 + + model = self.dummy_model() + sample = self.dummy_sample_deter + + scheduler.set_timesteps(num_inference_steps) + + for t in scheduler.timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, eta).prev_sample + + return sample + + def test_timesteps(self): + for timesteps in [100, 500, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_steps_offset(self): + for steps_offset in [0, 1]: + self.check_over_configs(steps_offset=steps_offset) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(steps_offset=1) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(5) + assert torch.equal(scheduler.timesteps, torch.LongTensor([801, 601, 401, 201, 1])) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "squaredcos_cap_v2"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(clip_sample=clip_sample) + + def test_timestep_spacing(self): + for timestep_spacing in ["trailing", "leading"]: + self.check_over_configs(timestep_spacing=timestep_spacing) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_time_indices(self): + for t in [1, 10, 49]: + self.check_over_forward(time_step=t) + + def test_inference_steps(self): + for t, num_inference_steps in zip([1, 10, 50], [10, 50, 500]): + self.check_over_forward(time_step=t, num_inference_steps=num_inference_steps) + + def test_eta(self): + for t, eta in zip([1, 10, 49], [0.0, 0.5, 1.0]): + self.check_over_forward(time_step=t, eta=eta) + + def test_variance(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + assert torch.sum(torch.abs(scheduler._get_variance(0, 0) - 0.0)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(420, 400) - 0.14771)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(980, 960) - 0.32460)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(0, 0) - 0.0)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(487, 486) - 0.00979)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(999, 998) - 0.02)) < 1e-5 + + def test_batch_step_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps, eta = 10, 0.0 + scheduler.set_timesteps(num_inference_steps) + + model = self.dummy_model() + sample1 = self.dummy_sample_deter + sample2 = self.dummy_sample_deter + 0.1 + sample3 = self.dummy_sample_deter - 0.1 + + per_sample_batch = sample1.shape[0] + samples = torch.stack([sample1, sample2, sample3], dim=0) + timesteps = torch.arange(num_inference_steps)[0:3, None].repeat(1, per_sample_batch) + + residual = model(samples.flatten(0, 1), timesteps.flatten(0, 1)) + pred_prev_sample = scheduler.batch_step_no_noise(residual, timesteps.flatten(0, 1), samples.flatten(0, 1), eta) + + result_sum = torch.sum(torch.abs(pred_prev_sample)) + result_mean = torch.mean(torch.abs(pred_prev_sample)) + + assert abs(result_sum.item() - 1147.7904) < 1e-2 + assert abs(result_mean.item() - 0.4982) < 1e-3 + + def test_full_loop_no_noise(self): + sample = self.full_loop() + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 172.0067) < 1e-2 + assert abs(result_mean.item() - 0.223967) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 52.5302) < 1e-2 + assert abs(result_mean.item() - 0.0684) < 1e-3 + + def test_full_loop_with_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=True, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 149.8295) < 1e-2 + assert abs(result_mean.item() - 0.1951) < 1e-3 + + def test_full_loop_with_no_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=False, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 149.0784) < 1e-2 + assert abs(result_mean.item() - 0.1941) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps, eta = 10, 0.0 + t_start = 8 + + model = self.dummy_model() + sample = self.dummy_sample_deter + + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for t in timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, eta).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 354.5418) < 1e-2, f" expected result sum 354.5418, but get {result_sum}" + assert abs(result_mean.item() - 0.4616) < 1e-3, f" expected result mean 0.4616, but get {result_mean}" diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddpm.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddpm.py new file mode 100644 index 0000000000000000000000000000000000000000..056b5d83350e4baeadf24693d2597fb185d44f46 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddpm.py @@ -0,0 +1,222 @@ +import torch + +from diffusers import DDPMScheduler + +from .test_schedulers import SchedulerCommonTest + + +class DDPMSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DDPMScheduler,) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "variance_type": "fixed_small", + "clip_sample": True, + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [1, 5, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "squaredcos_cap_v2"]: + self.check_over_configs(beta_schedule=schedule) + + def test_variance_type(self): + for variance in ["fixed_small", "fixed_large", "other"]: + self.check_over_configs(variance_type=variance) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(clip_sample=clip_sample) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample", "v_prediction"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "sample", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_time_indices(self): + for t in [0, 500, 999]: + self.check_over_forward(time_step=t) + + def test_variance(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + assert torch.sum(torch.abs(scheduler._get_variance(0) - 0.0)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(487) - 0.00979)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(999) - 0.02)) < 1e-5 + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_full_loop_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + for t in reversed(range(num_trained_timesteps)): + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + # if t > 0: + # noise = self.dummy_sample_deter + # variance = scheduler.get_variance(t) ** (0.5) * noise + # + # sample = pred_prev_sample + variance + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 258.9606) < 1e-2 + assert abs(result_mean.item() - 0.3372) < 1e-3 + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + for t in reversed(range(num_trained_timesteps)): + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + # if t > 0: + # noise = self.dummy_sample_deter + # variance = scheduler.get_variance(t) ** (0.5) * noise + # + # sample = pred_prev_sample + variance + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 202.0296) < 1e-2 + assert abs(result_mean.item() - 0.2631) < 1e-3 + + def test_custom_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + + scheduler.set_timesteps(timesteps=timesteps) + + scheduler_timesteps = scheduler.timesteps + + for i, timestep in enumerate(scheduler_timesteps): + if i == len(timesteps) - 1: + expected_prev_t = -1 + else: + expected_prev_t = timesteps[i + 1] + + prev_t = scheduler.previous_timestep(timestep) + prev_t = prev_t.item() + + self.assertEqual(prev_t, expected_prev_t) + + def test_custom_timesteps_increasing_order(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 51, 0] + + with self.assertRaises(ValueError, msg="`custom_timesteps` must be in descending order."): + scheduler.set_timesteps(timesteps=timesteps) + + def test_custom_timesteps_passing_both_num_inference_steps_and_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + num_inference_steps = len(timesteps) + + with self.assertRaises(ValueError, msg="Can only pass one of `num_inference_steps` or `custom_timesteps`."): + scheduler.set_timesteps(num_inference_steps=num_inference_steps, timesteps=timesteps) + + def test_custom_timesteps_too_large(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [scheduler.config.num_train_timesteps] + + with self.assertRaises( + ValueError, + msg="`timesteps` must start before `self.config.train_timesteps`: {scheduler.config.num_train_timesteps}}", + ): + scheduler.set_timesteps(timesteps=timesteps) + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + t_start = num_trained_timesteps - 2 + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for t in timesteps: + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 387.9466) < 1e-2, f" expected result sum 387.9466, but get {result_sum}" + assert abs(result_mean.item() - 0.5051) < 1e-3, f" expected result mean 0.5051, but get {result_mean}" diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddpm_parallel.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddpm_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..377067071c2568d247e113bea2e3e1347a4d31ca --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ddpm_parallel.py @@ -0,0 +1,251 @@ +# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from diffusers import DDPMParallelScheduler + +from .test_schedulers import SchedulerCommonTest + + +class DDPMParallelSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DDPMParallelScheduler,) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "variance_type": "fixed_small", + "clip_sample": True, + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [1, 5, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "squaredcos_cap_v2"]: + self.check_over_configs(beta_schedule=schedule) + + def test_variance_type(self): + for variance in ["fixed_small", "fixed_large", "other"]: + self.check_over_configs(variance_type=variance) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(clip_sample=clip_sample) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample", "v_prediction"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "sample", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_time_indices(self): + for t in [0, 500, 999]: + self.check_over_forward(time_step=t) + + def test_variance(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + assert torch.sum(torch.abs(scheduler._get_variance(0) - 0.0)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(487) - 0.00979)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(999) - 0.02)) < 1e-5 + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_batch_step_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + + model = self.dummy_model() + sample1 = self.dummy_sample_deter + sample2 = self.dummy_sample_deter + 0.1 + sample3 = self.dummy_sample_deter - 0.1 + + per_sample_batch = sample1.shape[0] + samples = torch.stack([sample1, sample2, sample3], dim=0) + timesteps = torch.arange(num_trained_timesteps)[0:3, None].repeat(1, per_sample_batch) + + residual = model(samples.flatten(0, 1), timesteps.flatten(0, 1)) + pred_prev_sample = scheduler.batch_step_no_noise(residual, timesteps.flatten(0, 1), samples.flatten(0, 1)) + + result_sum = torch.sum(torch.abs(pred_prev_sample)) + result_mean = torch.mean(torch.abs(pred_prev_sample)) + + assert abs(result_sum.item() - 1153.1833) < 1e-2 + assert abs(result_mean.item() - 0.5005) < 1e-3 + + def test_full_loop_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + for t in reversed(range(num_trained_timesteps)): + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 258.9606) < 1e-2 + assert abs(result_mean.item() - 0.3372) < 1e-3 + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + for t in reversed(range(num_trained_timesteps)): + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 202.0296) < 1e-2 + assert abs(result_mean.item() - 0.2631) < 1e-3 + + def test_custom_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + + scheduler.set_timesteps(timesteps=timesteps) + + scheduler_timesteps = scheduler.timesteps + + for i, timestep in enumerate(scheduler_timesteps): + if i == len(timesteps) - 1: + expected_prev_t = -1 + else: + expected_prev_t = timesteps[i + 1] + + prev_t = scheduler.previous_timestep(timestep) + prev_t = prev_t.item() + + self.assertEqual(prev_t, expected_prev_t) + + def test_custom_timesteps_increasing_order(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 51, 0] + + with self.assertRaises(ValueError, msg="`custom_timesteps` must be in descending order."): + scheduler.set_timesteps(timesteps=timesteps) + + def test_custom_timesteps_passing_both_num_inference_steps_and_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + num_inference_steps = len(timesteps) + + with self.assertRaises(ValueError, msg="Can only pass one of `num_inference_steps` or `custom_timesteps`."): + scheduler.set_timesteps(num_inference_steps=num_inference_steps, timesteps=timesteps) + + def test_custom_timesteps_too_large(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [scheduler.config.num_train_timesteps] + + with self.assertRaises( + ValueError, + msg="`timesteps` must start before `self.config.train_timesteps`: {scheduler.config.num_train_timesteps}}", + ): + scheduler.set_timesteps(timesteps=timesteps) + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_trained_timesteps = len(scheduler) + t_start = num_trained_timesteps - 2 + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for t in timesteps: + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 387.9466) < 1e-2, f" expected result sum 387.9466, but get {result_sum}" + assert abs(result_mean.item() - 0.5051) < 1e-3, f" expected result mean 0.5051, but get {result_mean}" diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_deis.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_deis.py new file mode 100644 index 0000000000000000000000000000000000000000..048bde51c36618221845dd48a642882208b77458 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_deis.py @@ -0,0 +1,273 @@ +import tempfile +import unittest + +import torch + +from diffusers import ( + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + UniPCMultistepScheduler, +) + +from .test_schedulers import SchedulerCommonTest + + +class DEISMultistepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DEISMultistepScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "solver_order": 2, + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + t = scheduler.timesteps[t] + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + @unittest.skip("Test not supported.") + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, scheduler=None, **config): + if scheduler is None: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_switch(self): + # make sure that iterating over schedulers with same config names gives same results + # for defaults + scheduler = DEISMultistepScheduler(**self.get_scheduler_config()) + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.23916) < 1e-3 + + scheduler = DPMSolverSinglestepScheduler.from_config(scheduler.config) + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + scheduler = UniPCMultistepScheduler.from_config(scheduler.config) + scheduler = DEISMultistepScheduler.from_config(scheduler.config) + + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.23916) < 1e-3 + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["logrho"]: + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + algorithm_type="deis", + solver_order=order, + solver_type=solver_type, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_solver_order_and_type(self): + for algorithm_type in ["deis"]: + for solver_type in ["logrho"]: + for order in [1, 2, 3]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + assert not torch.isnan(sample).any(), "Samples have nan numbers" + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.23916) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.091) < 1e-3 + + def test_fp16_support(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(thresholding=True, dynamic_thresholding_ratio=0) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter.half() + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + assert sample.dtype == torch.float16 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 8 + + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 315.3016) < 1e-2, f" expected result sum 315.3016, but get {result_sum}" + assert abs(result_mean.item() - 0.41054) < 1e-3, f" expected result mean 0.41054, but get {result_mean}" + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_multi.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_multi.py new file mode 100644 index 0000000000000000000000000000000000000000..28c354709dc914f18347e7089ec7cb66c99f3174 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_multi.py @@ -0,0 +1,368 @@ +import tempfile +import unittest + +import torch + +from diffusers import ( + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + UniPCMultistepScheduler, +) + +from .test_schedulers import SchedulerCommonTest + + +class DPMSolverMultistepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DPMSolverMultistepScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "solver_order": 2, + "prediction_type": "epsilon", + "thresholding": False, + "sample_max_value": 1.0, + "algorithm_type": "dpmsolver++", + "solver_type": "midpoint", + "lower_order_final": False, + "euler_at_final": False, + "lambda_min_clipped": -float("inf"), + "variance_type": None, + "final_sigmas_type": "sigma_min", + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + t = new_scheduler.timesteps[t] + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + @unittest.skip("Test not supported.") + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + time_step = new_scheduler.timesteps[time_step] + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, scheduler=None, **config): + if scheduler is None: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + generator = torch.manual_seed(0) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + return sample + + def full_loop_custom_timesteps(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + # reset the timesteps using `timesteps` + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps=None, timesteps=timesteps) + + generator = torch.manual_seed(0) + model = self.dummy_model() + sample = self.dummy_sample_deter + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["midpoint", "heun"]: + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + algorithm_type="dpmsolver++", + solver_order=order, + solver_type=solver_type, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_solver_order_and_type(self): + for algorithm_type in ["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"]: + for solver_type in ["midpoint", "heun"]: + for order in [1, 2, 3]: + for prediction_type in ["epsilon", "sample"]: + if algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]: + if order == 3: + continue + else: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + assert not torch.isnan(sample).any(), "Samples have nan numbers" + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_euler_at_final(self): + self.check_over_configs(euler_at_final=True) + self.check_over_configs(euler_at_final=False) + + def test_lambda_min_clipped(self): + self.check_over_configs(lambda_min_clipped=-float("inf")) + self.check_over_configs(lambda_min_clipped=-5.1) + + def test_variance_type(self): + self.check_over_configs(variance_type=None) + self.check_over_configs(variance_type="learned_range") + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.3301) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 5 + + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 318.4111) < 1e-2, f" expected result sum 318.4111, but get {result_sum}" + assert abs(result_mean.item() - 0.4146) < 1e-3, f" expected result mean 0.4146, but get {result_mean}" + + def test_full_loop_no_noise_thres(self): + sample = self.full_loop(thresholding=True, dynamic_thresholding_ratio=0.87, sample_max_value=0.5) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 1.1364) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2251) < 1e-3 + + def test_full_loop_with_karras_and_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction", use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2096) < 1e-3 + + def test_full_loop_with_lu_and_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction", use_lu_lambdas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.1554) < 1e-3 + + def test_switch(self): + # make sure that iterating over schedulers with same config names gives same results + # for defaults + scheduler = DPMSolverMultistepScheduler(**self.get_scheduler_config()) + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.3301) < 1e-3 + + scheduler = DPMSolverSinglestepScheduler.from_config(scheduler.config) + scheduler = UniPCMultistepScheduler.from_config(scheduler.config) + scheduler = DEISMultistepScheduler.from_config(scheduler.config) + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.3301) < 1e-3 + + def test_fp16_support(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(thresholding=True, dynamic_thresholding_ratio=0) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter.half() + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + assert sample.dtype == torch.float16 + + def test_duplicated_timesteps(self): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(scheduler.config.num_train_timesteps) + assert len(scheduler.timesteps) == scheduler.num_inference_steps + + def test_custom_timesteps(self): + for algorithm_type in ["dpmsolver++", "sde-dpmsolver++"]: + for prediction_type in ["epsilon", "sample", "v_prediction"]: + for final_sigmas_type in ["sigma_min", "zero"]: + sample = self.full_loop( + algorithm_type=algorithm_type, + prediction_type=prediction_type, + final_sigmas_type=final_sigmas_type, + ) + sample_custom_timesteps = self.full_loop_custom_timesteps( + algorithm_type=algorithm_type, + prediction_type=prediction_type, + final_sigmas_type=final_sigmas_type, + ) + assert torch.sum(torch.abs(sample - sample_custom_timesteps)) < 1e-5, ( + f"Scheduler outputs are not identical for algorithm_type: {algorithm_type}, prediction_type: {prediction_type} and final_sigmas_type: {final_sigmas_type}" + ) + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_multi_inverse.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_multi_inverse.py new file mode 100644 index 0000000000000000000000000000000000000000..0eced957190ca46c14877827bb410cc7b84ed6ae --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_multi_inverse.py @@ -0,0 +1,273 @@ +import tempfile + +import torch + +from diffusers import DPMSolverMultistepInverseScheduler, DPMSolverMultistepScheduler + +from .test_schedulers import SchedulerCommonTest + + +class DPMSolverMultistepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DPMSolverMultistepInverseScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "solver_order": 2, + "prediction_type": "epsilon", + "thresholding": False, + "sample_max_value": 1.0, + "algorithm_type": "dpmsolver++", + "solver_type": "midpoint", + "lower_order_final": False, + "lambda_min_clipped": -float("inf"), + "variance_type": None, + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + t = scheduler.timesteps[t] + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, scheduler=None, **config): + if scheduler is None: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["midpoint", "heun"]: + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + algorithm_type="dpmsolver++", + solver_order=order, + solver_type=solver_type, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_solver_order_and_type(self): + for algorithm_type in ["dpmsolver", "dpmsolver++"]: + for solver_type in ["midpoint", "heun"]: + for order in [1, 2, 3]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + assert not torch.isnan(sample).any(), "Samples have nan numbers" + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_lambda_min_clipped(self): + self.check_over_configs(lambda_min_clipped=-float("inf")) + self.check_over_configs(lambda_min_clipped=-5.1) + + def test_variance_type(self): + self.check_over_configs(variance_type=None) + self.check_over_configs(variance_type="learned_range") + + def test_timestep_spacing(self): + for timestep_spacing in ["trailing", "leading"]: + self.check_over_configs(timestep_spacing=timestep_spacing) + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.7047) < 1e-3 + + def test_full_loop_no_noise_thres(self): + sample = self.full_loop(thresholding=True, dynamic_thresholding_ratio=0.87, sample_max_value=0.5) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 19.8933) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 1.5194) < 1e-3 + + def test_full_loop_with_karras_and_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction", use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 1.7833) < 2e-3 + + def test_switch(self): + # make sure that iterating over schedulers with same config names gives same results + # for defaults + scheduler = DPMSolverMultistepInverseScheduler(**self.get_scheduler_config()) + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.7047) < 1e-3 + + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + scheduler = DPMSolverMultistepInverseScheduler.from_config(scheduler.config) + + sample = self.full_loop(scheduler=scheduler) + new_result_mean = torch.mean(torch.abs(sample)) + + assert abs(new_result_mean.item() - result_mean.item()) < 1e-3 + + def test_fp16_support(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(thresholding=True, dynamic_thresholding_ratio=0) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter.half() + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + assert sample.dtype == torch.float16 + + def test_unique_timesteps(self, **config): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(scheduler.config.num_train_timesteps) + assert len(scheduler.timesteps.unique()) == scheduler.num_inference_steps + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_sde.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_sde.py new file mode 100644 index 0000000000000000000000000000000000000000..e4dde67344ac7a317b4110ea511b590c88442bd7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_sde.py @@ -0,0 +1,173 @@ +import torch + +from diffusers import DPMSolverSDEScheduler + +from ..testing_utils import require_torchsde, torch_device +from .test_schedulers import SchedulerCommonTest + + +@require_torchsde +class DPMSolverSDESchedulerTest(SchedulerCommonTest): + scheduler_classes = (DPMSolverSDEScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "noise_sampler_seed": 0, + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_full_loop_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["mps"]: + assert abs(result_sum.item() - 167.47821044921875) < 1e-2 + assert abs(result_mean.item() - 0.2178705964565277) < 1e-3 + elif torch_device in ["cuda", "xpu"]: + assert abs(result_sum.item() - 171.59352111816406) < 1e-2 + assert abs(result_mean.item() - 0.22342906892299652) < 1e-3 + else: + assert abs(result_sum.item() - 162.52383422851562) < 1e-2 + assert abs(result_mean.item() - 0.211619570851326) < 1e-3 + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["mps"]: + assert abs(result_sum.item() - 124.77149200439453) < 1e-2 + assert abs(result_mean.item() - 0.16226289014816284) < 1e-3 + elif torch_device in ["cuda", "xpu"]: + assert abs(result_sum.item() - 128.1663360595703) < 1e-2 + assert abs(result_mean.item() - 0.16688326001167297) < 1e-3 + else: + assert abs(result_sum.item() - 119.8487548828125) < 1e-2 + assert abs(result_mean.item() - 0.1560530662536621) < 1e-3 + + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["mps"]: + assert abs(result_sum.item() - 167.46957397460938) < 1e-2 + assert abs(result_mean.item() - 0.21805934607982635) < 1e-3 + elif torch_device in ["cuda", "xpu"]: + assert abs(result_sum.item() - 171.59353637695312) < 1e-2 + assert abs(result_mean.item() - 0.22342908382415771) < 1e-3 + else: + assert abs(result_sum.item() - 162.52383422851562) < 1e-2 + assert abs(result_mean.item() - 0.211619570851326) < 1e-3 + + def test_full_loop_device_karras_sigmas(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config, use_karras_sigmas=True) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["mps"]: + assert abs(result_sum.item() - 176.66974135742188) < 1e-2 + assert abs(result_mean.item() - 0.23003872730981811) < 1e-2 + elif torch_device in ["cuda", "xpu"]: + assert abs(result_sum.item() - 177.63653564453125) < 1e-2 + assert abs(result_mean.item() - 0.23003872730981811) < 1e-2 + else: + assert abs(result_sum.item() - 170.3135223388672) < 1e-2 + assert abs(result_mean.item() - 0.23003872730981811) < 1e-2 + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_single.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_single.py new file mode 100644 index 0000000000000000000000000000000000000000..0756a5ed71ff97d0b25363643b77404a6785d936 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_dpm_single.py @@ -0,0 +1,356 @@ +import tempfile +import unittest + +import torch + +from diffusers import ( + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + UniPCMultistepScheduler, +) + +from .test_schedulers import SchedulerCommonTest + + +class DPMSolverSinglestepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DPMSolverSinglestepScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "solver_order": 2, + "prediction_type": "epsilon", + "thresholding": False, + "sample_max_value": 1.0, + "algorithm_type": "dpmsolver++", + "solver_type": "midpoint", + "lambda_min_clipped": -float("inf"), + "variance_type": None, + "final_sigmas_type": "sigma_min", + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + t = scheduler.timesteps[t] + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + @unittest.skip("Test not supported.") + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, scheduler=None, **config): + if scheduler is None: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def full_loop_custom_timesteps(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + # reset the timesteps using`timesteps` + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps=None, timesteps=timesteps) + + model = self.dummy_model() + sample = self.dummy_sample_deter + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_full_uneven_loop(self): + scheduler = DPMSolverSinglestepScheduler(**self.get_scheduler_config()) + num_inference_steps = 50 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # make sure that the first t is uneven + for i, t in enumerate(scheduler.timesteps[3:]): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2574) < 1e-3 + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_switch(self): + # make sure that iterating over schedulers with same config names gives same results + # for defaults + scheduler = DPMSolverSinglestepScheduler(**self.get_scheduler_config()) + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2791) < 1e-3 + + scheduler = DEISMultistepScheduler.from_config(scheduler.config) + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + scheduler = UniPCMultistepScheduler.from_config(scheduler.config) + scheduler = DPMSolverSinglestepScheduler.from_config(scheduler.config) + + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2791) < 1e-3 + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["midpoint", "heun"]: + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + algorithm_type="dpmsolver++", + solver_order=order, + solver_type=solver_type, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_solver_order_and_type(self): + for algorithm_type in ["dpmsolver", "dpmsolver++", "sde-dpmsolver++"]: + for solver_type in ["midpoint", "heun"]: + for order in [1, 2, 3]: + for prediction_type in ["epsilon", "sample"]: + if algorithm_type == "sde-dpmsolver++": + if order == 3: + continue + else: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + assert not torch.isnan(sample).any(), "Samples have nan numbers" + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_lambda_min_clipped(self): + self.check_over_configs(lambda_min_clipped=-float("inf")) + self.check_over_configs(lambda_min_clipped=-5.1) + + def test_variance_type(self): + self.check_over_configs(variance_type=None) + self.check_over_configs(variance_type="learned_range") + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2791) < 1e-3 + + def test_full_loop_with_karras(self): + sample = self.full_loop(use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2248) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.1453) < 1e-3 + + def test_full_loop_with_karras_and_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction", use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.0649) < 1e-3 + + def test_fp16_support(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(thresholding=True, dynamic_thresholding_ratio=0) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter.half() + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + assert sample.dtype == torch.float16 + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[0] + time_step_1 = scheduler.timesteps[1] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 5 + + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 269.2187) < 1e-2, f" expected result sum 269.2187, but get {result_sum}" + assert abs(result_mean.item() - 0.3505) < 1e-3, f" expected result mean 0.3505, but get {result_mean}" + + def test_custom_timesteps(self): + for prediction_type in ["epsilon", "sample", "v_prediction"]: + for lower_order_final in [True, False]: + for final_sigmas_type in ["sigma_min", "zero"]: + sample = self.full_loop( + prediction_type=prediction_type, + lower_order_final=lower_order_final, + final_sigmas_type=final_sigmas_type, + ) + sample_custom_timesteps = self.full_loop_custom_timesteps( + prediction_type=prediction_type, + lower_order_final=lower_order_final, + final_sigmas_type=final_sigmas_type, + ) + assert torch.sum(torch.abs(sample - sample_custom_timesteps)) < 1e-5, ( + f"Scheduler outputs are not identical for prediction_type: {prediction_type}, lower_order_final: {lower_order_final} and final_sigmas_type: {final_sigmas_type}" + ) + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_edm_dpmsolver_multistep.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_edm_dpmsolver_multistep.py new file mode 100644 index 0000000000000000000000000000000000000000..8525ce61c40d2670d03d7e04b075fb8236a505cc --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_edm_dpmsolver_multistep.py @@ -0,0 +1,262 @@ +import tempfile +import unittest + +import torch + +from diffusers import EDMDPMSolverMultistepScheduler + +from .test_schedulers import SchedulerCommonTest + + +class EDMDPMSolverMultistepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (EDMDPMSolverMultistepScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "sigma_min": 0.002, + "sigma_max": 80.0, + "sigma_data": 0.5, + "num_train_timesteps": 1000, + "solver_order": 2, + "prediction_type": "epsilon", + "thresholding": False, + "sample_max_value": 1.0, + "algorithm_type": "dpmsolver++", + "solver_type": "midpoint", + "lower_order_final": False, + "euler_at_final": False, + "final_sigmas_type": "sigma_min", + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + t = new_scheduler.timesteps[t] + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + @unittest.skip("Test not supported.") + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + time_step = new_scheduler.timesteps[time_step] + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, scheduler=None, **config): + if scheduler is None: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["midpoint", "heun"]: + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + algorithm_type="dpmsolver++", + solver_order=order, + solver_type=solver_type, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + # TODO (patil-suraj): Fix this test + @unittest.skip("Skip for now, as it failing currently but works with the actual model") + def test_solver_order_and_type(self): + for algorithm_type in ["dpmsolver++", "sde-dpmsolver++"]: + for solver_type in ["midpoint", "heun"]: + for order in [1, 2, 3]: + for prediction_type in ["epsilon", "v_prediction"]: + if algorithm_type == "sde-dpmsolver++": + if order == 3: + continue + else: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + algorithm_type=algorithm_type, + ) + assert not torch.isnan(sample).any(), ( + f"Samples have nan numbers, {order}, {solver_type}, {prediction_type}, {algorithm_type}" + ) + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_euler_at_final(self): + self.check_over_configs(euler_at_final=True) + self.check_over_configs(euler_at_final=False) + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.0001) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 5 + + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 8.1661) < 1e-2, f" expected result sum 8.1661, but get {result_sum}" + assert abs(result_mean.item() - 0.0106) < 1e-3, f" expected result mean 0.0106, but get {result_mean}" + + def test_full_loop_no_noise_thres(self): + sample = self.full_loop(thresholding=True, dynamic_thresholding_ratio=0.87, sample_max_value=0.5) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.0080) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.0092) < 1e-3 + + def test_duplicated_timesteps(self, **config): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(scheduler.config.num_train_timesteps) + assert len(scheduler.timesteps) == scheduler.num_inference_steps + + @unittest.skip("Test not supported.") + def test_trained_betas(self): + pass diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_edm_euler.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_edm_euler.py new file mode 100644 index 0000000000000000000000000000000000000000..acac4b1f4caed2aff6741a40e9214adedc0f744f --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_edm_euler.py @@ -0,0 +1,203 @@ +import inspect +import tempfile +import unittest +from typing import Dict, List, Tuple + +import torch + +from diffusers import EDMEulerScheduler + +from .test_schedulers import SchedulerCommonTest + + +class EDMEulerSchedulerTest(SchedulerCommonTest): + scheduler_classes = (EDMEulerScheduler,) + forward_default_kwargs = (("num_inference_steps", 10),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 256, + "sigma_min": 0.002, + "sigma_max": 80.0, + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_full_loop_no_noise(self, num_inference_steps=10, seed=0): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + for i, t in enumerate(scheduler.timesteps): + scaled_sample = scheduler.scale_model_input(sample, t) + + model_output = model(scaled_sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 34.1855) < 1e-3 + assert abs(result_mean.item() - 0.044) < 1e-3 + + def test_full_loop_device(self, num_inference_steps=10, seed=0): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + for i, t in enumerate(scheduler.timesteps): + scaled_sample = scheduler.scale_model_input(sample, t) + + model_output = model(scaled_sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 34.1855) < 1e-3 + assert abs(result_mean.item() - 0.044) < 1e-3 + + # Override test_from_save_pretrained to use EDMEulerScheduler-specific logic + def test_from_save_pretrained(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + scheduler.set_timesteps(num_inference_steps) + new_scheduler.set_timesteps(num_inference_steps) + timestep = scheduler.timesteps[0] + + sample = self.dummy_sample + + scaled_sample = scheduler.scale_model_input(sample, timestep) + residual = 0.1 * scaled_sample + + new_scaled_sample = new_scheduler.scale_model_input(sample, timestep) + new_residual = 0.1 * new_scaled_sample + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + output = scheduler.step(residual, timestep, sample, **kwargs).prev_sample + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + new_output = new_scheduler.step(new_residual, timestep, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + # Override test_from_save_pretrained to use EDMEulerScheduler-specific logic + def test_step_shape(self): + num_inference_steps = 10 + + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + + timestep_0 = scheduler.timesteps[0] + timestep_1 = scheduler.timesteps[1] + + sample = self.dummy_sample + scaled_sample = scheduler.scale_model_input(sample, timestep_0) + residual = 0.1 * scaled_sample + + output_0 = scheduler.step(residual, timestep_0, sample).prev_sample + output_1 = scheduler.step(residual, timestep_1, sample).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + # Override test_from_save_pretrained to use EDMEulerScheduler-specific logic + def test_scheduler_outputs_equivalence(self): + def set_nan_tensor_to_zero(t): + t[t != t] = 0 + return t + + def recursive_check(tuple_object, dict_object): + if isinstance(tuple_object, (List, Tuple)): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object.values()): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif isinstance(tuple_object, Dict): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object.values(), dict_object.values()): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif tuple_object is None: + return + else: + self.assertTrue( + torch.allclose( + set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 + ), + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), + ) + + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", 50) + + timestep = 0 + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + timestep = scheduler.timesteps[0] + + sample = self.dummy_sample + scaled_sample = scheduler.scale_model_input(sample, timestep) + residual = 0.1 * scaled_sample + + # Set the seed before state as some schedulers are stochastic like EulerAncestralDiscreteScheduler, EulerDiscreteScheduler + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + outputs_dict = scheduler.step(residual, timestep, sample, **kwargs) + + scheduler.set_timesteps(num_inference_steps) + + scaled_sample = scheduler.scale_model_input(sample, timestep) + residual = 0.1 * scaled_sample + + # Set the seed before state as some schedulers are stochastic like EulerAncestralDiscreteScheduler, EulerDiscreteScheduler + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + outputs_tuple = scheduler.step(residual, timestep, sample, return_dict=False, **kwargs) + + recursive_check(outputs_tuple, outputs_dict) + + @unittest.skip(reason="EDMEulerScheduler does not support beta schedules.") + def test_trained_betas(self): + pass diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_euler.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_euler.py new file mode 100644 index 0000000000000000000000000000000000000000..ee99465abfc39e7a7ce7a6734b22dbeb42df5efd --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_euler.py @@ -0,0 +1,271 @@ +import torch + +from diffusers import EulerDiscreteScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class EulerDiscreteSchedulerTest(SchedulerCommonTest): + scheduler_classes = (EulerDiscreteScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_timestep_type(self): + timestep_types = ["discrete", "continuous"] + for timestep_type in timestep_types: + self.check_over_configs(timestep_type=timestep_type) + + def test_karras_sigmas(self): + self.check_over_configs(use_karras_sigmas=True, sigma_min=0.02, sigma_max=700.0) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = self.num_inference_steps + scheduler.set_timesteps(num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + return sample + + def full_loop_custom_timesteps(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = self.num_inference_steps + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + # reset the timesteps using `timesteps` + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps=None, timesteps=timesteps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + return sample + + def full_loop_custom_sigmas(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = self.num_inference_steps + scheduler.set_timesteps(num_inference_steps) + sigmas = scheduler.sigmas + # reset the timesteps using `sigmas` + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps=None, sigmas=sigmas) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + return sample + + def test_full_loop_no_noise(self): + sample = self.full_loop() + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 10.0807) < 1e-2 + assert abs(result_mean.item() - 0.0131) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 0.0002) < 1e-2 + assert abs(result_mean.item() - 2.2676e-06) < 1e-3 + + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma.cpu() + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 10.0807) < 1e-2 + assert abs(result_mean.item() - 0.0131) < 1e-3 + + def test_full_loop_device_karras_sigmas(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config, use_karras_sigmas=True) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma.cpu() + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 124.52299499511719) < 1e-2 + assert abs(result_mean.item() - 0.16213932633399963) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + # add noise + t_start = self.num_inference_steps - 2 + noise = self.dummy_noise_deter + noise = noise.to(sample.device) + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 57062.9297) < 1e-2, f" expected result sum 57062.9297, but get {result_sum}" + assert abs(result_mean.item() - 74.3007) < 1e-3, f" expected result mean 74.3007, but get {result_mean}" + + def test_custom_timesteps(self): + for prediction_type in ["epsilon", "sample", "v_prediction"]: + for interpolation_type in ["linear", "log_linear"]: + for final_sigmas_type in ["sigma_min", "zero"]: + sample = self.full_loop( + prediction_type=prediction_type, + interpolation_type=interpolation_type, + final_sigmas_type=final_sigmas_type, + ) + sample_custom_timesteps = self.full_loop_custom_timesteps( + prediction_type=prediction_type, + interpolation_type=interpolation_type, + final_sigmas_type=final_sigmas_type, + ) + assert torch.sum(torch.abs(sample - sample_custom_timesteps)) < 1e-5, ( + f"Scheduler outputs are not identical for prediction_type: {prediction_type}, interpolation_type: {interpolation_type} and final_sigmas_type: {final_sigmas_type}" + ) + + def test_custom_sigmas(self): + for prediction_type in ["epsilon", "sample", "v_prediction"]: + for final_sigmas_type in ["sigma_min", "zero"]: + sample = self.full_loop( + prediction_type=prediction_type, + final_sigmas_type=final_sigmas_type, + ) + sample_custom_timesteps = self.full_loop_custom_sigmas( + prediction_type=prediction_type, + final_sigmas_type=final_sigmas_type, + ) + assert torch.sum(torch.abs(sample - sample_custom_timesteps)) < 1e-5, ( + f"Scheduler outputs are not identical for prediction_type: {prediction_type} and final_sigmas_type: {final_sigmas_type}" + ) + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_euler_ancestral.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_euler_ancestral.py new file mode 100644 index 0000000000000000000000000000000000000000..c4fe61bfc387ba1361800f6d660e761b9fdef7fd --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_euler_ancestral.py @@ -0,0 +1,156 @@ +import torch + +from diffusers import EulerAncestralDiscreteScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): + scheduler_classes = (EulerAncestralDiscreteScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_full_loop_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma.cpu() + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 152.3192) < 1e-2 + assert abs(result_mean.item() - 0.1983) < 1e-3 + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 108.4439) < 1e-2 + assert abs(result_mean.item() - 0.1412) < 1e-3 + + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma.cpu() + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 152.3192) < 1e-2 + assert abs(result_mean.item() - 0.1983) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + t_start = self.num_inference_steps - 2 + + scheduler.set_timesteps(self.num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + # add noise + noise = self.dummy_noise_deter + noise = noise.to(sample.device) + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 56163.0508) < 1e-2, f" expected result sum 56163.0508, but get {result_sum}" + assert abs(result_mean.item() - 73.1290) < 1e-3, f" expected result mean 73.1290, but get {result_mean}" diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_heun.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_heun.py new file mode 100644 index 0000000000000000000000000000000000000000..97bef50048ba2011c106df5987af7368c033ee5a --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_heun.py @@ -0,0 +1,227 @@ +import torch + +from diffusers import HeunDiscreteScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class HeunDiscreteSchedulerTest(SchedulerCommonTest): + scheduler_classes = (HeunDiscreteScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear", "exp"]: + self.check_over_configs(beta_schedule=schedule) + + def test_clip_sample(self): + for clip_sample_range in [1.0, 2.0, 3.0]: + self.check_over_configs(clip_sample_range=clip_sample_range, clip_sample=True) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction", "sample"]: + self.check_over_configs(prediction_type=prediction_type) + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = self.num_inference_steps + scheduler.set_timesteps(num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + return sample + + def full_loop_custom_timesteps(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = self.num_inference_steps + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + timesteps = torch.cat([timesteps[:1], timesteps[1::2]]) + # reset the timesteps using `timesteps` + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps=None, timesteps=timesteps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + return sample + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu", "mps"]: + assert abs(result_sum.item() - 0.1233) < 1e-2 + assert abs(result_mean.item() - 0.0002) < 1e-3 + else: + # CUDA + assert abs(result_sum.item() - 0.1233) < 1e-2 + assert abs(result_mean.item() - 0.0002) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu", "mps"]: + assert abs(result_sum.item() - 4.6934e-07) < 1e-2 + assert abs(result_mean.item() - 6.1112e-10) < 1e-3 + else: + # CUDA + assert abs(result_sum.item() - 4.693428650170972e-07) < 1e-2 + assert abs(result_mean.item() - 0.0002) < 1e-3 + + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if str(torch_device).startswith("cpu"): + # The following sum varies between 148 and 156 on mps. Why? + assert abs(result_sum.item() - 0.1233) < 1e-2 + assert abs(result_mean.item() - 0.0002) < 1e-3 + elif str(torch_device).startswith("mps"): + # Larger tolerance on mps + assert abs(result_mean.item() - 0.0002) < 1e-2 + else: + # CUDA + assert abs(result_sum.item() - 0.1233) < 1e-2 + assert abs(result_mean.item() - 0.0002) < 1e-3 + + def test_full_loop_device_karras_sigmas(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config, use_karras_sigmas=True) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 0.00015) < 1e-2 + assert abs(result_mean.item() - 1.9869554535034695e-07) < 1e-2 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + t_start = self.num_inference_steps - 2 + noise = self.dummy_noise_deter + noise = noise.to(torch_device) + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 75074.8906) < 1e-2, f" expected result sum 75074.8906, but get {result_sum}" + assert abs(result_mean.item() - 97.7538) < 1e-3, f" expected result mean 97.7538, but get {result_mean}" + + def test_custom_timesteps(self): + for prediction_type in ["epsilon", "sample", "v_prediction"]: + for timestep_spacing in ["linspace", "leading"]: + sample = self.full_loop( + prediction_type=prediction_type, + timestep_spacing=timestep_spacing, + ) + sample_custom_timesteps = self.full_loop_custom_timesteps( + prediction_type=prediction_type, + timestep_spacing=timestep_spacing, + ) + assert torch.sum(torch.abs(sample - sample_custom_timesteps)) < 1e-5, ( + f"Scheduler outputs are not identical for prediction_type: {prediction_type}, timestep_spacing: {timestep_spacing}" + ) + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ipndm.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ipndm.py new file mode 100644 index 0000000000000000000000000000000000000000..ac7973c582950816039e96e1ef4aad245d4cd3f1 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_ipndm.py @@ -0,0 +1,165 @@ +import tempfile +import unittest + +import torch + +from diffusers import IPNDMScheduler + +from .test_schedulers import SchedulerCommonTest + + +class IPNDMSchedulerTest(SchedulerCommonTest): + scheduler_classes = (IPNDMScheduler,) + forward_default_kwargs = (("num_inference_steps", 50),) + + def get_scheduler_config(self, **kwargs): + config = {"num_train_timesteps": 1000} + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.1, residual + 0.05] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.ets = dummy_past_residuals[:] + + if time_step is None: + time_step = scheduler.timesteps[len(scheduler.timesteps) // 2] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.ets = dummy_past_residuals[:] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + @unittest.skip("Test not supported.") + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.1, residual + 0.05] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.ets = dummy_past_residuals[:] + + if time_step is None: + time_step = scheduler.timesteps[len(scheduler.timesteps) // 2] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.ets = dummy_past_residuals[:] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + scheduler._step_index = None + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.1, residual + 0.05] + scheduler.ets = dummy_past_residuals[:] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps, time_step=None) + + def test_inference_steps(self): + for t, num_inference_steps in zip([1, 5, 10], [10, 50, 100]): + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=None) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 2540529) < 10 diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_kdpm2_ancestral.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_kdpm2_ancestral.py new file mode 100644 index 0000000000000000000000000000000000000000..135534db45361503fa5a7965e7a4c2640db33247 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_kdpm2_ancestral.py @@ -0,0 +1,164 @@ +import torch + +from diffusers import KDPM2AncestralDiscreteScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class KDPM2AncestralDiscreteSchedulerTest(SchedulerCommonTest): + scheduler_classes = (KDPM2AncestralDiscreteScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_full_loop_no_noise(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 13979.9433) < 1e-2 + assert abs(result_mean.item() - 18.2030) < 5e-3 + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_full_loop_with_v_prediction(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + generator = torch.manual_seed(0) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 331.8133) < 1e-2 + assert abs(result_mean.item() - 0.4320) < 1e-3 + + def test_full_loop_device(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 13979.9433) < 1e-1 + assert abs(result_mean.item() - 18.2030) < 1e-3 + + def test_full_loop_with_noise(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + generator = torch.manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + # add noise + t_start = self.num_inference_steps - 2 + noise = self.dummy_noise_deter + noise = noise.to(sample.device) + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 93087.3437) < 1e-2, f" expected result sum 93087.3437, but get {result_sum}" + assert abs(result_mean.item() - 121.2074) < 5e-3, f" expected result mean 121.2074, but get {result_mean}" + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_kdpm2_discrete.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_kdpm2_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..370ba2253ee28ecd06ceaf911b2947343a9ee318 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_kdpm2_discrete.py @@ -0,0 +1,172 @@ +import torch + +from diffusers import KDPM2DiscreteScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class KDPM2DiscreteSchedulerTest(SchedulerCommonTest): + scheduler_classes = (KDPM2DiscreteScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu", "mps"]: + assert abs(result_sum.item() - 4.6934e-07) < 1e-2 + assert abs(result_mean.item() - 6.1112e-10) < 1e-3 + else: + # CUDA + assert abs(result_sum.item() - 4.693428650170972e-07) < 1e-2 + assert abs(result_mean.item() - 0.0002) < 1e-3 + + def test_full_loop_no_noise(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu", "mps"]: + assert abs(result_sum.item() - 20.4125) < 1e-2 + assert abs(result_mean.item() - 0.0266) < 1e-3 + else: + # CUDA + assert abs(result_sum.item() - 20.4125) < 1e-2 + assert abs(result_mean.item() - 0.0266) < 1e-3 + + def test_full_loop_device(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if str(torch_device).startswith("cpu"): + # The following sum varies between 148 and 156 on mps. Why? + assert abs(result_sum.item() - 20.4125) < 1e-2 + assert abs(result_mean.item() - 0.0266) < 1e-3 + else: + # CUDA + assert abs(result_sum.item() - 20.4125) < 1e-2 + assert abs(result_mean.item() - 0.0266) < 1e-3 + + def test_full_loop_with_noise(self): + if torch_device == "mps": + return + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + # add noise + t_start = self.num_inference_steps - 2 + noise = self.dummy_noise_deter + noise = noise.to(sample.device) + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 70408.4062) < 1e-2, f" expected result sum 70408.4062, but get {result_sum}" + assert abs(result_mean.item() - 91.6776) < 1e-3, f" expected result mean 91.6776, but get {result_mean}" + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_lcm.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_lcm.py new file mode 100644 index 0000000000000000000000000000000000000000..f54970e0eba33baa0cbdc7d868a339c0f60bf189 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_lcm.py @@ -0,0 +1,300 @@ +import tempfile +from typing import Dict, List, Tuple + +import torch + +from diffusers import LCMScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class LCMSchedulerTest(SchedulerCommonTest): + scheduler_classes = (LCMScheduler,) + forward_default_kwargs = (("num_inference_steps", 10),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.00085, + "beta_end": 0.0120, + "beta_schedule": "scaled_linear", + "prediction_type": "epsilon", + } + + config.update(**kwargs) + return config + + @property + def default_valid_timestep(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + timestep = scheduler.timesteps[-1] + return timestep + + def test_timesteps(self): + for timesteps in [100, 500, 1000]: + # 0 is not guaranteed to be in the timestep schedule, but timesteps - 1 is + self.check_over_configs(time_step=timesteps - 1, num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(time_step=self.default_valid_timestep, beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear", "squaredcos_cap_v2"]: + self.check_over_configs(time_step=self.default_valid_timestep, beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(time_step=self.default_valid_timestep, prediction_type=prediction_type) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(time_step=self.default_valid_timestep, clip_sample=clip_sample) + + def test_thresholding(self): + self.check_over_configs(time_step=self.default_valid_timestep, thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs( + time_step=self.default_valid_timestep, + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_time_indices(self): + # Get default timestep schedule. + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + for t in timesteps: + self.check_over_forward(time_step=t) + + def test_inference_steps(self): + # Hardcoded for now + for t, num_inference_steps in zip([99, 39, 39, 19], [10, 25, 26, 50]): + self.check_over_forward(time_step=t, num_inference_steps=num_inference_steps) + + # Override test_add_noise_device because the hardcoded num_inference_steps of 100 doesn't work + # for LCMScheduler under default settings + def test_add_noise_device(self, num_inference_steps=10): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + sample = self.dummy_sample.to(torch_device) + scaled_sample = scheduler.scale_model_input(sample, 0.0) + self.assertEqual(sample.shape, scaled_sample.shape) + + noise = torch.randn(scaled_sample.shape).to(torch_device) + t = scheduler.timesteps[5][None] + noised = scheduler.add_noise(scaled_sample, noise, t) + self.assertEqual(noised.shape, scaled_sample.shape) + + # Override test_from_save_pretrained because it hardcodes a timestep of 1 + def test_from_save_pretrained(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + timestep = self.default_valid_timestep + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + scheduler.set_timesteps(num_inference_steps) + new_scheduler.set_timesteps(num_inference_steps) + + kwargs["generator"] = torch.manual_seed(0) + output = scheduler.step(residual, timestep, sample, **kwargs).prev_sample + + kwargs["generator"] = torch.manual_seed(0) + new_output = new_scheduler.step(residual, timestep, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + # Override test_step_shape because uses 0 and 1 as hardcoded timesteps + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + scheduler.set_timesteps(num_inference_steps) + + timestep_0 = scheduler.timesteps[-2] + timestep_1 = scheduler.timesteps[-1] + + output_0 = scheduler.step(residual, timestep_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, timestep_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + # Override test_set_scheduler_outputs_equivalence since it uses 0 as a hardcoded timestep + def test_scheduler_outputs_equivalence(self): + def set_nan_tensor_to_zero(t): + t[t != t] = 0 + return t + + def recursive_check(tuple_object, dict_object): + if isinstance(tuple_object, (List, Tuple)): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object.values()): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif isinstance(tuple_object, Dict): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object.values(), dict_object.values()): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif tuple_object is None: + return + else: + self.assertTrue( + torch.allclose( + set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 + ), + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), + ) + + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", 50) + + timestep = self.default_valid_timestep + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + scheduler.set_timesteps(num_inference_steps) + kwargs["generator"] = torch.manual_seed(0) + outputs_dict = scheduler.step(residual, timestep, sample, **kwargs) + + scheduler.set_timesteps(num_inference_steps) + kwargs["generator"] = torch.manual_seed(0) + outputs_tuple = scheduler.step(residual, timestep, sample, return_dict=False, **kwargs) + + recursive_check(outputs_tuple, outputs_dict) + + def full_loop(self, num_inference_steps=10, seed=0, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(seed) + + scheduler.set_timesteps(num_inference_steps) + + for t in scheduler.timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, generator).prev_sample + + return sample + + def test_full_loop_onestep(self): + sample = self.full_loop(num_inference_steps=1) + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + # TODO: get expected sum and mean + assert abs(result_sum.item() - 18.7097) < 1e-3 + assert abs(result_mean.item() - 0.0244) < 1e-3 + + def test_full_loop_multistep(self): + sample = self.full_loop(num_inference_steps=10) + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + # TODO: get expected sum and mean + assert abs(result_sum.item() - 197.7616) < 1e-3 + assert abs(result_mean.item() - 0.2575) < 1e-3 + + def test_custom_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + + scheduler.set_timesteps(timesteps=timesteps) + + scheduler_timesteps = scheduler.timesteps + + for i, timestep in enumerate(scheduler_timesteps): + if i == len(timesteps) - 1: + expected_prev_t = -1 + else: + expected_prev_t = timesteps[i + 1] + + prev_t = scheduler.previous_timestep(timestep) + prev_t = prev_t.item() + + self.assertEqual(prev_t, expected_prev_t) + + def test_custom_timesteps_increasing_order(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 51, 0] + + with self.assertRaises(ValueError, msg="`custom_timesteps` must be in descending order."): + scheduler.set_timesteps(timesteps=timesteps) + + def test_custom_timesteps_passing_both_num_inference_steps_and_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + num_inference_steps = len(timesteps) + + with self.assertRaises(ValueError, msg="Can only pass one of `num_inference_steps` or `custom_timesteps`."): + scheduler.set_timesteps(num_inference_steps=num_inference_steps, timesteps=timesteps) + + def test_custom_timesteps_too_large(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [scheduler.config.num_train_timesteps] + + with self.assertRaises( + ValueError, + msg="`timesteps` must start before `self.config.train_timesteps`: {scheduler.config.num_train_timesteps}}", + ): + scheduler.set_timesteps(timesteps=timesteps) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_lms.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_lms.py new file mode 100644 index 0000000000000000000000000000000000000000..c4abca3ac973dc00870efd564e66bcdf53c2bd6e --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_lms.py @@ -0,0 +1,176 @@ +import torch + +from diffusers import LMSDiscreteScheduler + +from ..testing_utils import torch_device +from .test_schedulers import SchedulerCommonTest + + +class LMSDiscreteSchedulerTest(SchedulerCommonTest): + scheduler_classes = (LMSDiscreteScheduler,) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_time_indices(self): + for t in [0, 500, 800]: + self.check_over_forward(time_step=t) + + def test_full_loop_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 1006.388) < 1e-2 + assert abs(result_mean.item() - 1.31) < 1e-3 + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 0.0017) < 1e-2 + assert abs(result_mean.item() - 2.2676e-06) < 1e-3 + + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma.cpu() + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 1006.388) < 1e-2 + assert abs(result_mean.item() - 1.31) < 1e-3 + + def test_full_loop_device_karras_sigmas(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config, use_karras_sigmas=True) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 3812.9927) < 2e-2 + assert abs(result_mean.item() - 4.9648) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + + # add noise + t_start = self.num_inference_steps - 2 + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 27663.6895) < 1e-2 + assert abs(result_mean.item() - 36.0204) < 1e-3 + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_pndm.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_pndm.py new file mode 100644 index 0000000000000000000000000000000000000000..13c6904682223de2382b2d0d7b38e4a435f97988 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_pndm.py @@ -0,0 +1,244 @@ +import tempfile +import unittest + +import torch + +from diffusers import PNDMScheduler + +from .test_schedulers import SchedulerCommonTest + + +class PNDMSchedulerTest(SchedulerCommonTest): + scheduler_classes = (PNDMScheduler,) + forward_default_kwargs = (("num_inference_steps", 50),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.1, residual + 0.05] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.ets = dummy_past_residuals[:] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.ets = dummy_past_residuals[:] + + output = scheduler.step_prk(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step_prk(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + output = scheduler.step_plms(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step_plms(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + @unittest.skip("Test not supported.") + def test_from_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.1, residual + 0.05] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.ets = dummy_past_residuals[:] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.ets = dummy_past_residuals[:] + + output = scheduler.step_prk(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step_prk(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + output = scheduler.step_plms(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step_plms(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.prk_timesteps): + residual = model(sample, t) + sample = scheduler.step_prk(residual, t, sample).prev_sample + + for i, t in enumerate(scheduler.plms_timesteps): + residual = model(sample, t) + sample = scheduler.step_plms(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.1, residual + 0.05] + scheduler.ets = dummy_past_residuals[:] + + output_0 = scheduler.step_prk(residual, 0, sample, **kwargs).prev_sample + output_1 = scheduler.step_prk(residual, 1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + output_0 = scheduler.step_plms(residual, 0, sample, **kwargs).prev_sample + output_1 = scheduler.step_plms(residual, 1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_steps_offset(self): + for steps_offset in [0, 1]: + self.check_over_configs(steps_offset=steps_offset) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(steps_offset=1) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(10) + assert torch.equal( + scheduler.timesteps, + torch.LongTensor( + [901, 851, 851, 801, 801, 751, 751, 701, 701, 651, 651, 601, 601, 501, 401, 301, 201, 101, 1] + ), + ) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001], [0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "squaredcos_cap_v2"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_time_indices(self): + for t in [1, 5, 10]: + self.check_over_forward(time_step=t) + + def test_inference_steps(self): + for t, num_inference_steps in zip([1, 5, 10], [10, 50, 100]): + self.check_over_forward(num_inference_steps=num_inference_steps) + + def test_pow_of_3_inference_steps(self): + # earlier version of set_timesteps() caused an error indexing alpha's with inference steps as power of 3 + num_inference_steps = 27 + + for scheduler_class in self.scheduler_classes: + sample = self.dummy_sample + residual = 0.1 * sample + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + + # before power of 3 fix, would error on first step, so we only need to do two + for i, t in enumerate(scheduler.prk_timesteps[:2]): + sample = scheduler.step_prk(residual, t, sample).prev_sample + + def test_inference_plms_no_past_residuals(self): + with self.assertRaises(ValueError): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.step_plms(self.dummy_sample, 1, self.dummy_sample).prev_sample + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 198.1318) < 1e-2 + assert abs(result_mean.item() - 0.2580) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 67.3986) < 1e-2 + assert abs(result_mean.item() - 0.0878) < 1e-3 + + def test_full_loop_with_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=True, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 230.0399) < 1e-2 + assert abs(result_mean.item() - 0.2995) < 1e-3 + + def test_full_loop_with_no_set_alpha_to_one(self): + # We specify different beta, so that the first alpha is 0.99 + sample = self.full_loop(set_alpha_to_one=False, beta_start=0.01) + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 186.9482) < 1e-2 + assert abs(result_mean.item() - 0.2434) < 1e-3 diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_sasolver.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_sasolver.py new file mode 100644 index 0000000000000000000000000000000000000000..2c2d2c0397bb620bbf556ac8d4701eb17494e36b --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_sasolver.py @@ -0,0 +1,200 @@ +import torch + +from diffusers import SASolverScheduler + +from ..testing_utils import require_torchsde, torch_device +from .test_schedulers import SchedulerCommonTest + + +@require_torchsde +class SASolverSchedulerTest(SchedulerCommonTest): + scheduler_classes = (SASolverScheduler,) + forward_default_kwargs = (("num_inference_steps", 10),) + num_inference_steps = 10 + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1100, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + } + + config.update(**kwargs) + return config + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[ + : max( + scheduler.config.predictor_order, + scheduler.config.corrector_order - 1, + ) + ] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [10, 50, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.00001, 0.0001, 0.001], [0.0002, 0.002, 0.02]): + self.check_over_configs(beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear"]: + self.check_over_configs(beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_full_loop_no_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + generator = torch.manual_seed(0) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t, generator=generator) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu"]: + assert abs(result_sum.item() - 337.394287109375) < 1e-2 + assert abs(result_mean.item() - 0.43931546807289124) < 1e-3 + elif torch_device in ["cuda"]: + assert abs(result_sum.item() - 329.1999816894531) < 1e-2 + assert abs(result_mean.item() - 0.4286458194255829) < 1e-3 + + def test_full_loop_with_v_prediction(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(prediction_type="v_prediction") + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + generator = torch.manual_seed(0) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t, generator=generator) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu"]: + assert abs(result_sum.item() - 193.1467742919922) < 1e-2 + assert abs(result_mean.item() - 0.2514931857585907) < 1e-3 + elif torch_device in ["cuda"]: + assert abs(result_sum.item() - 193.4154052734375) < 1e-2 + assert abs(result_mean.item() - 0.2518429756164551) < 1e-3 + + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + generator = torch.manual_seed(0) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu"]: + assert abs(result_sum.item() - 337.394287109375) < 1e-2 + assert abs(result_mean.item() - 0.43931546807289124) < 1e-3 + elif torch_device in ["cuda"]: + assert abs(result_sum.item() - 337.394287109375) < 1e-2 + assert abs(result_mean.item() - 0.4393154978752136) < 1e-3 + + def test_full_loop_device_karras_sigmas(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config, use_karras_sigmas=True) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter.to(torch_device) * scheduler.init_noise_sigma + sample = sample.to(torch_device) + generator = torch.manual_seed(0) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + if torch_device in ["cpu"]: + assert abs(result_sum.item() - 837.2554931640625) < 1e-2 + assert abs(result_mean.item() - 1.0901764631271362) < 1e-2 + elif torch_device in ["cuda"]: + assert abs(result_sum.item() - 837.25537109375) < 1e-2 + assert abs(result_mean.item() - 1.0901763439178467) < 1e-2 + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_score_sde_ve.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_score_sde_ve.py new file mode 100644 index 0000000000000000000000000000000000000000..08c30f9b1e0c2ce1f7baab82f5076efabe465a69 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_score_sde_ve.py @@ -0,0 +1,189 @@ +import tempfile +import unittest + +import numpy as np +import torch + +from diffusers import ScoreSdeVeScheduler + + +class ScoreSdeVeSchedulerTest(unittest.TestCase): + # TODO adapt with class SchedulerCommonTest (scheduler needs Numpy Integration) + scheduler_classes = (ScoreSdeVeScheduler,) + forward_default_kwargs = () + + @property + def dummy_sample(self): + batch_size = 4 + num_channels = 3 + height = 8 + width = 8 + + sample = torch.rand((batch_size, num_channels, height, width)) + + return sample + + @property + def dummy_sample_deter(self): + batch_size = 4 + num_channels = 3 + height = 8 + width = 8 + + num_elems = batch_size * num_channels * height * width + sample = torch.arange(num_elems) + sample = sample.reshape(num_channels, height, width, batch_size) + sample = sample / num_elems + sample = sample.permute(3, 0, 1, 2) + + return sample + + def dummy_model(self): + def model(sample, t, *args): + return sample * t / (t + 1) + + return model + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 2000, + "snr": 0.15, + "sigma_min": 0.01, + "sigma_max": 1348, + "sampling_eps": 1e-5, + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + + for scheduler_class in self.scheduler_classes: + sample = self.dummy_sample + residual = 0.1 * sample + + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + output = scheduler.step_pred( + residual, time_step, sample, generator=torch.manual_seed(0), **kwargs + ).prev_sample + new_output = new_scheduler.step_pred( + residual, time_step, sample, generator=torch.manual_seed(0), **kwargs + ).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + output = scheduler.step_correct(residual, sample, generator=torch.manual_seed(0), **kwargs).prev_sample + new_output = new_scheduler.step_correct( + residual, sample, generator=torch.manual_seed(0), **kwargs + ).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler correction are not identical" + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + kwargs.update(forward_kwargs) + + for scheduler_class in self.scheduler_classes: + sample = self.dummy_sample + residual = 0.1 * sample + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + output = scheduler.step_pred( + residual, time_step, sample, generator=torch.manual_seed(0), **kwargs + ).prev_sample + new_output = new_scheduler.step_pred( + residual, time_step, sample, generator=torch.manual_seed(0), **kwargs + ).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + output = scheduler.step_correct(residual, sample, generator=torch.manual_seed(0), **kwargs).prev_sample + new_output = new_scheduler.step_correct( + residual, sample, generator=torch.manual_seed(0), **kwargs + ).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler correction are not identical" + + def test_timesteps(self): + for timesteps in [10, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_sigmas(self): + for sigma_min, sigma_max in zip([0.0001, 0.001, 0.01], [1, 100, 1000]): + self.check_over_configs(sigma_min=sigma_min, sigma_max=sigma_max) + + def test_time_indices(self): + for t in [0.1, 0.5, 0.75]: + self.check_over_forward(time_step=t) + + def test_full_loop_no_noise(self): + kwargs = dict(self.forward_default_kwargs) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 3 + + model = self.dummy_model() + sample = self.dummy_sample_deter + + scheduler.set_sigmas(num_inference_steps) + scheduler.set_timesteps(num_inference_steps) + generator = torch.manual_seed(0) + + for i, t in enumerate(scheduler.timesteps): + sigma_t = scheduler.sigmas[i] + + for _ in range(scheduler.config.correct_steps): + with torch.no_grad(): + model_output = model(sample, sigma_t) + sample = scheduler.step_correct(model_output, sample, generator=generator, **kwargs).prev_sample + + with torch.no_grad(): + model_output = model(sample, sigma_t) + + output = scheduler.step_pred(model_output, t, sample, generator=generator, **kwargs) + sample, _ = output.prev_sample, output.prev_sample_mean + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert np.isclose(result_sum.item(), 14372758528.0) + assert np.isclose(result_mean.item(), 18714530.0) + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + output_0 = scheduler.step_pred(residual, 0, sample, generator=torch.manual_seed(0), **kwargs).prev_sample + output_1 = scheduler.step_pred(residual, 1, sample, generator=torch.manual_seed(0), **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_tcd.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_tcd.py new file mode 100644 index 0000000000000000000000000000000000000000..e95c536c7f0a279df379d872a335c1e6b30d6ccc --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_tcd.py @@ -0,0 +1,180 @@ +import torch + +from diffusers import TCDScheduler + +from .test_schedulers import SchedulerCommonTest + + +class TCDSchedulerTest(SchedulerCommonTest): + scheduler_classes = (TCDScheduler,) + forward_default_kwargs = (("num_inference_steps", 10),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.00085, + "beta_end": 0.0120, + "beta_schedule": "scaled_linear", + "prediction_type": "epsilon", + } + + config.update(**kwargs) + return config + + @property + def default_num_inference_steps(self): + return 10 + + @property + def default_valid_timestep(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + timestep = scheduler.timesteps[-1] + return timestep + + def test_timesteps(self): + for timesteps in [100, 500, 1000]: + # 0 is not guaranteed to be in the timestep schedule, but timesteps - 1 is + self.check_over_configs(time_step=timesteps - 1, num_train_timesteps=timesteps) + + def test_betas(self): + for beta_start, beta_end in zip([0.0001, 0.001, 0.01, 0.1], [0.002, 0.02, 0.2, 2]): + self.check_over_configs(time_step=self.default_valid_timestep, beta_start=beta_start, beta_end=beta_end) + + def test_schedules(self): + for schedule in ["linear", "scaled_linear", "squaredcos_cap_v2"]: + self.check_over_configs(time_step=self.default_valid_timestep, beta_schedule=schedule) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(time_step=self.default_valid_timestep, prediction_type=prediction_type) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(time_step=self.default_valid_timestep, clip_sample=clip_sample) + + def test_thresholding(self): + self.check_over_configs(time_step=self.default_valid_timestep, thresholding=False) + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs( + time_step=self.default_valid_timestep, + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + ) + + def test_time_indices(self): + # Get default timestep schedule. + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + timesteps = scheduler.timesteps + for t in timesteps: + self.check_over_forward(time_step=t) + + def test_inference_steps(self): + # Hardcoded for now + for t, num_inference_steps in zip([99, 39, 39, 19], [10, 25, 26, 50]): + self.check_over_forward(time_step=t, num_inference_steps=num_inference_steps) + + def full_loop(self, num_inference_steps=10, seed=0, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + eta = 0.0 # refer to gamma in the paper + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(seed) + scheduler.set_timesteps(num_inference_steps) + + for t in scheduler.timesteps: + residual = model(sample, t) + sample = scheduler.step(residual, t, sample, eta, generator).prev_sample + + return sample + + def test_full_loop_onestep_deter(self): + sample = self.full_loop(num_inference_steps=1) + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 29.8715) < 1e-3 # 0.0778918 + assert abs(result_mean.item() - 0.0389) < 1e-3 + + def test_full_loop_multistep_deter(self): + sample = self.full_loop(num_inference_steps=10) + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 181.2040) < 1e-3 + assert abs(result_mean.item() - 0.2359) < 1e-3 + + def test_custom_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + + scheduler.set_timesteps(timesteps=timesteps) + + scheduler_timesteps = scheduler.timesteps + + for i, timestep in enumerate(scheduler_timesteps): + if i == len(timesteps) - 1: + expected_prev_t = -1 + else: + expected_prev_t = timesteps[i + 1] + + prev_t = scheduler.previous_timestep(timestep) + prev_t = prev_t.item() + + self.assertEqual(prev_t, expected_prev_t) + + def test_custom_timesteps_increasing_order(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 51, 0] + + with self.assertRaises(ValueError, msg="`custom_timesteps` must be in descending order."): + scheduler.set_timesteps(timesteps=timesteps) + + def test_custom_timesteps_passing_both_num_inference_steps_and_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [100, 87, 50, 1, 0] + num_inference_steps = len(timesteps) + + with self.assertRaises(ValueError, msg="Can only pass one of `num_inference_steps` or `custom_timesteps`."): + scheduler.set_timesteps(num_inference_steps=num_inference_steps, timesteps=timesteps) + + def test_custom_timesteps_too_large(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = [scheduler.config.num_train_timesteps] + + with self.assertRaises( + ValueError, + msg="`timesteps` must start before `self.config.train_timesteps`: {scheduler.config.num_train_timesteps}}", + ): + scheduler.set_timesteps(timesteps=timesteps) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_unclip.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_unclip.py new file mode 100644 index 0000000000000000000000000000000000000000..9e66a328f42e228a9ed02f6ca52d07f7e8944d64 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_unclip.py @@ -0,0 +1,141 @@ +import unittest + +import torch + +from diffusers import UnCLIPScheduler + +from .test_schedulers import SchedulerCommonTest + + +# UnCLIPScheduler is a modified DDPMScheduler with a subset of the configuration. +class UnCLIPSchedulerTest(SchedulerCommonTest): + scheduler_classes = (UnCLIPScheduler,) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "variance_type": "fixed_small_log", + "clip_sample": True, + "clip_sample_range": 1.0, + "prediction_type": "epsilon", + } + + config.update(**kwargs) + return config + + def test_timesteps(self): + for timesteps in [1, 5, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_variance_type(self): + for variance in ["fixed_small_log", "learned_range"]: + self.check_over_configs(variance_type=variance) + + def test_clip_sample(self): + for clip_sample in [True, False]: + self.check_over_configs(clip_sample=clip_sample) + + def test_clip_sample_range(self): + for clip_sample_range in [1, 5, 10, 20]: + self.check_over_configs(clip_sample_range=clip_sample_range) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_time_indices(self): + for time_step in [0, 500, 999]: + for prev_timestep in [None, 5, 100, 250, 500, 750]: + if prev_timestep is not None and prev_timestep >= time_step: + continue + + self.check_over_forward(time_step=time_step, prev_timestep=prev_timestep) + + def test_variance_fixed_small_log(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(variance_type="fixed_small_log") + scheduler = scheduler_class(**scheduler_config) + + assert torch.sum(torch.abs(scheduler._get_variance(0) - 1.0000e-10)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(487) - 0.0549625)) < 1e-5 + assert torch.sum(torch.abs(scheduler._get_variance(999) - 0.9994987)) < 1e-5 + + def test_variance_learned_range(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(variance_type="learned_range") + scheduler = scheduler_class(**scheduler_config) + + predicted_variance = 0.5 + + assert scheduler._get_variance(1, predicted_variance=predicted_variance) - -10.1712790 < 1e-5 + assert scheduler._get_variance(487, predicted_variance=predicted_variance) - -5.7998052 < 1e-5 + assert scheduler._get_variance(999, predicted_variance=predicted_variance) - -0.0010011 < 1e-5 + + def test_full_loop(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + timesteps = scheduler.timesteps + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + for i, t in enumerate(timesteps): + # 1. predict noise residual + residual = model(sample, t) + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step(residual, t, sample, generator=generator).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 252.2682495) < 1e-2 + assert abs(result_mean.item() - 0.3284743) < 1e-3 + + def test_full_loop_skip_timesteps(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(25) + + timesteps = scheduler.timesteps + + model = self.dummy_model() + sample = self.dummy_sample_deter + generator = torch.manual_seed(0) + + for i, t in enumerate(timesteps): + # 1. predict noise residual + residual = model(sample, t) + + if i + 1 == timesteps.shape[0]: + prev_timestep = None + else: + prev_timestep = timesteps[i + 1] + + # 2. predict previous mean of sample x_t-1 + pred_prev_sample = scheduler.step( + residual, t, sample, prev_timestep=prev_timestep, generator=generator + ).prev_sample + + sample = pred_prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 258.2044983) < 1e-2 + assert abs(result_mean.item() - 0.3362038) < 1e-3 + + @unittest.skip("Test not supported.") + def test_trained_betas(self): + pass + + @unittest.skip("Test not supported.") + def test_add_noise_device(self): + pass diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_unipc.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_unipc.py new file mode 100644 index 0000000000000000000000000000000000000000..197c831cb015e55e329edde67bc6f8014c53c42a --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_unipc.py @@ -0,0 +1,401 @@ +import tempfile + +import torch + +from diffusers import ( + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + DPMSolverSinglestepScheduler, + UniPCMultistepScheduler, +) + +from .test_schedulers import SchedulerCommonTest + + +class UniPCMultistepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (UniPCMultistepScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "solver_order": 2, + "solver_type": "bh2", + "final_sigmas_type": "sigma_min", + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + t = scheduler.timesteps[t] + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, scheduler=None, **config): + if scheduler is None: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_switch(self): + # make sure that iterating over schedulers with same config names gives same results + # for defaults + scheduler = UniPCMultistepScheduler(**self.get_scheduler_config()) + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2464) < 1e-3 + + scheduler = DPMSolverSinglestepScheduler.from_config(scheduler.config) + scheduler = DEISMultistepScheduler.from_config(scheduler.config) + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + scheduler = UniPCMultistepScheduler.from_config(scheduler.config) + + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2464) < 1e-3 + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["bh1", "bh2"]: + for threshold in [0.5, 1.0, 2.0]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + thresholding=True, + prediction_type=prediction_type, + sample_max_value=threshold, + solver_order=order, + solver_type=solver_type, + ) + + def test_prediction_type(self): + for prediction_type in ["epsilon", "v_prediction"]: + self.check_over_configs(prediction_type=prediction_type) + + def test_rescale_betas_zero_snr(self): + for rescale_betas_zero_snr in [True, False]: + self.check_over_configs(rescale_betas_zero_snr=rescale_betas_zero_snr) + + def test_solver_order_and_type(self): + for solver_type in ["bh1", "bh2"]: + for order in [1, 2, 3]: + for prediction_type in ["epsilon", "sample"]: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + prediction_type=prediction_type, + ) + assert not torch.isnan(sample).any(), "Samples have nan numbers" + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2464) < 1e-3 + + def test_full_loop_with_karras(self): + sample = self.full_loop(use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2925) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.1014) < 1e-3 + + def test_full_loop_with_karras_and_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction", use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.1966) < 1e-3 + + def test_fp16_support(self): + for order in [1, 2, 3]: + for solver_type in ["bh1", "bh2"]: + for prediction_type in ["epsilon", "sample", "v_prediction"]: + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config( + thresholding=True, + dynamic_thresholding_ratio=0, + prediction_type=prediction_type, + solver_order=order, + solver_type=solver_type, + ) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter.half() + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + assert sample.dtype == torch.float16 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 8 + + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 315.5757) < 1e-2, f" expected result sum 315.5757, but get {result_sum}" + assert abs(result_mean.item() - 0.4109) < 1e-3, f" expected result mean 0.4109, but get {result_mean}" + + +class UniPCMultistepScheduler1DTest(UniPCMultistepSchedulerTest): + @property + def dummy_sample(self): + batch_size = 4 + num_channels = 3 + width = 8 + + sample = torch.rand((batch_size, num_channels, width)) + + return sample + + @property + def dummy_noise_deter(self): + batch_size = 4 + num_channels = 3 + width = 8 + + num_elems = batch_size * num_channels * width + sample = torch.arange(num_elems).flip(-1) + sample = sample.reshape(num_channels, width, batch_size) + sample = sample / num_elems + sample = sample.permute(2, 0, 1) + + return sample + + @property + def dummy_sample_deter(self): + batch_size = 4 + num_channels = 3 + width = 8 + + num_elems = batch_size * num_channels * width + sample = torch.arange(num_elems) + sample = sample.reshape(num_channels, width, batch_size) + sample = sample / num_elems + sample = sample.permute(2, 0, 1) + + return sample + + def test_switch(self): + # make sure that iterating over schedulers with same config names gives same results + # for defaults + scheduler = UniPCMultistepScheduler(**self.get_scheduler_config()) + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2441) < 1e-3 + + scheduler = DPMSolverSinglestepScheduler.from_config(scheduler.config) + scheduler = DEISMultistepScheduler.from_config(scheduler.config) + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + scheduler = UniPCMultistepScheduler.from_config(scheduler.config) + + sample = self.full_loop(scheduler=scheduler) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2441) < 1e-3 + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2441) < 1e-3 + + def test_full_loop_with_karras(self): + sample = self.full_loop(use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.2898) < 1e-3 + + def test_full_loop_with_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction") + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.1014) < 1e-3 + + def test_full_loop_with_karras_and_v_prediction(self): + sample = self.full_loop(prediction_type="v_prediction", use_karras_sigmas=True) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.1944) < 1e-3 + + def test_full_loop_with_noise(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + t_start = 8 + + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + # add noise + noise = self.dummy_noise_deter + timesteps = scheduler.timesteps[t_start * scheduler.order :] + sample = scheduler.add_noise(sample, noise, timesteps[:1]) + + for i, t in enumerate(timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 39.0870) < 1e-2, f" expected result sum 39.0870, but get {result_sum}" + assert abs(result_mean.item() - 0.4072) < 1e-3, f" expected result mean 0.4072, but get {result_mean}" + + def test_beta_sigmas(self): + self.check_over_configs(use_beta_sigmas=True) + + def test_exponential_sigmas(self): + self.check_over_configs(use_exponential_sigmas=True) diff --git a/pythonProject/diffusers-main/tests/schedulers/test_scheduler_vq_diffusion.py b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_vq_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..c12825ba2e620550910be6d50a273f7043edc7fe --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_scheduler_vq_diffusion.py @@ -0,0 +1,59 @@ +import unittest + +import torch +import torch.nn.functional as F + +from diffusers import VQDiffusionScheduler + +from .test_schedulers import SchedulerCommonTest + + +class VQDiffusionSchedulerTest(SchedulerCommonTest): + scheduler_classes = (VQDiffusionScheduler,) + + def get_scheduler_config(self, **kwargs): + config = { + "num_vec_classes": 4097, + "num_train_timesteps": 100, + } + + config.update(**kwargs) + return config + + def dummy_sample(self, num_vec_classes): + batch_size = 4 + height = 8 + width = 8 + + sample = torch.randint(0, num_vec_classes, (batch_size, height * width)) + + return sample + + @property + def dummy_sample_deter(self): + assert False + + def dummy_model(self, num_vec_classes): + def model(sample, t, *args): + batch_size, num_latent_pixels = sample.shape + logits = torch.rand((batch_size, num_vec_classes - 1, num_latent_pixels)) + return_value = F.log_softmax(logits.double(), dim=1).float() + return return_value + + return model + + def test_timesteps(self): + for timesteps in [2, 5, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_num_vec_classes(self): + for num_vec_classes in [5, 100, 1000, 4000]: + self.check_over_configs(num_vec_classes=num_vec_classes) + + def test_time_indices(self): + for t in [0, 50, 99]: + self.check_over_forward(time_step=t) + + @unittest.skip("Test not supported.") + def test_add_noise_device(self): + pass diff --git a/pythonProject/diffusers-main/tests/schedulers/test_schedulers.py b/pythonProject/diffusers-main/tests/schedulers/test_schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..5a8380e659fcebcf35283ef0861c0b95e6185271 --- /dev/null +++ b/pythonProject/diffusers-main/tests/schedulers/test_schedulers.py @@ -0,0 +1,868 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import inspect +import json +import os +import tempfile +import unittest +import uuid +from typing import Dict, List, Tuple + +import numpy as np +import torch +from huggingface_hub import delete_repo + +import diffusers +from diffusers import ( + CMStochasticIterativeScheduler, + DDIMScheduler, + DEISMultistepScheduler, + DiffusionPipeline, + EDMEulerScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + IPNDMScheduler, + LMSDiscreteScheduler, + UniPCMultistepScheduler, + VQDiffusionScheduler, +) +from diffusers.configuration_utils import ConfigMixin, register_to_config +from diffusers.schedulers.scheduling_utils import SchedulerMixin +from diffusers.utils import logging + +from ..others.test_utils import TOKEN, USER, is_staging_test +from ..testing_utils import CaptureLogger, torch_device + + +torch.backends.cuda.matmul.allow_tf32 = False + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class SchedulerObject(SchedulerMixin, ConfigMixin): + config_name = "config.json" + + @register_to_config + def __init__( + self, + a=2, + b=5, + c=(2, 5), + d="for diffusion", + e=[1, 3], + ): + pass + + +class SchedulerObject2(SchedulerMixin, ConfigMixin): + config_name = "config.json" + + @register_to_config + def __init__( + self, + a=2, + b=5, + c=(2, 5), + d="for diffusion", + f=[1, 3], + ): + pass + + +class SchedulerObject3(SchedulerMixin, ConfigMixin): + config_name = "config.json" + + @register_to_config + def __init__( + self, + a=2, + b=5, + c=(2, 5), + d="for diffusion", + e=[1, 3], + f=[1, 3], + ): + pass + + +class SchedulerBaseTests(unittest.TestCase): + def test_save_load_from_different_config(self): + obj = SchedulerObject() + + # mock add obj class to `diffusers` + setattr(diffusers, "SchedulerObject", SchedulerObject) + logger = logging.get_logger("diffusers.configuration_utils") + + with tempfile.TemporaryDirectory() as tmpdirname: + obj.save_config(tmpdirname) + with CaptureLogger(logger) as cap_logger_1: + config = SchedulerObject2.load_config(tmpdirname) + new_obj_1 = SchedulerObject2.from_config(config) + + # now save a config parameter that is not expected + with open(os.path.join(tmpdirname, SchedulerObject.config_name), "r") as f: + data = json.load(f) + data["unexpected"] = True + + with open(os.path.join(tmpdirname, SchedulerObject.config_name), "w") as f: + json.dump(data, f) + + with CaptureLogger(logger) as cap_logger_2: + config = SchedulerObject.load_config(tmpdirname) + new_obj_2 = SchedulerObject.from_config(config) + + with CaptureLogger(logger) as cap_logger_3: + config = SchedulerObject2.load_config(tmpdirname) + new_obj_3 = SchedulerObject2.from_config(config) + + assert new_obj_1.__class__ == SchedulerObject2 + assert new_obj_2.__class__ == SchedulerObject + assert new_obj_3.__class__ == SchedulerObject2 + + assert cap_logger_1.out == "" + assert ( + cap_logger_2.out + == "The config attributes {'unexpected': True} were passed to SchedulerObject, but are not expected and" + " will" + " be ignored. Please verify your config.json configuration file.\n" + ) + assert cap_logger_2.out.replace("SchedulerObject", "SchedulerObject2") == cap_logger_3.out + + def test_save_load_compatible_schedulers(self): + SchedulerObject2._compatibles = ["SchedulerObject"] + SchedulerObject._compatibles = ["SchedulerObject2"] + + obj = SchedulerObject() + + # mock add obj class to `diffusers` + setattr(diffusers, "SchedulerObject", SchedulerObject) + setattr(diffusers, "SchedulerObject2", SchedulerObject2) + logger = logging.get_logger("diffusers.configuration_utils") + + with tempfile.TemporaryDirectory() as tmpdirname: + obj.save_config(tmpdirname) + + # now save a config parameter that is expected by another class, but not origin class + with open(os.path.join(tmpdirname, SchedulerObject.config_name), "r") as f: + data = json.load(f) + data["f"] = [0, 0] + data["unexpected"] = True + + with open(os.path.join(tmpdirname, SchedulerObject.config_name), "w") as f: + json.dump(data, f) + + with CaptureLogger(logger) as cap_logger: + config = SchedulerObject.load_config(tmpdirname) + new_obj = SchedulerObject.from_config(config) + + assert new_obj.__class__ == SchedulerObject + + assert ( + cap_logger.out + == "The config attributes {'unexpected': True} were passed to SchedulerObject, but are not expected and" + " will" + " be ignored. Please verify your config.json configuration file.\n" + ) + + def test_save_load_from_different_config_comp_schedulers(self): + SchedulerObject3._compatibles = ["SchedulerObject", "SchedulerObject2"] + SchedulerObject2._compatibles = ["SchedulerObject", "SchedulerObject3"] + SchedulerObject._compatibles = ["SchedulerObject2", "SchedulerObject3"] + + obj = SchedulerObject() + + # mock add obj class to `diffusers` + setattr(diffusers, "SchedulerObject", SchedulerObject) + setattr(diffusers, "SchedulerObject2", SchedulerObject2) + setattr(diffusers, "SchedulerObject3", SchedulerObject3) + logger = logging.get_logger("diffusers.configuration_utils") + logger.setLevel(diffusers.logging.INFO) + + with tempfile.TemporaryDirectory() as tmpdirname: + obj.save_config(tmpdirname) + + with CaptureLogger(logger) as cap_logger_1: + config = SchedulerObject.load_config(tmpdirname) + new_obj_1 = SchedulerObject.from_config(config) + + with CaptureLogger(logger) as cap_logger_2: + config = SchedulerObject2.load_config(tmpdirname) + new_obj_2 = SchedulerObject2.from_config(config) + + with CaptureLogger(logger) as cap_logger_3: + config = SchedulerObject3.load_config(tmpdirname) + new_obj_3 = SchedulerObject3.from_config(config) + + assert new_obj_1.__class__ == SchedulerObject + assert new_obj_2.__class__ == SchedulerObject2 + assert new_obj_3.__class__ == SchedulerObject3 + + assert cap_logger_1.out == "" + assert cap_logger_2.out == "{'f'} was not found in config. Values will be initialized to default values.\n" + assert cap_logger_3.out == "{'f'} was not found in config. Values will be initialized to default values.\n" + + def test_default_arguments_not_in_config(self): + pipe = DiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-pipe", torch_dtype=torch.float16 + ) + assert pipe.scheduler.__class__ == DDIMScheduler + + # Default for DDIMScheduler + assert pipe.scheduler.config.timestep_spacing == "leading" + + # Switch to a different one, verify we use the default for that class + pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) + assert pipe.scheduler.config.timestep_spacing == "linspace" + + # Override with kwargs + pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing") + assert pipe.scheduler.config.timestep_spacing == "trailing" + + # Verify overridden kwargs stick + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + assert pipe.scheduler.config.timestep_spacing == "trailing" + + # And stick + pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) + assert pipe.scheduler.config.timestep_spacing == "trailing" + + def test_default_solver_type_after_switch(self): + pipe = DiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-pipe", torch_dtype=torch.float16 + ) + assert pipe.scheduler.__class__ == DDIMScheduler + + pipe.scheduler = DEISMultistepScheduler.from_config(pipe.scheduler.config) + assert pipe.scheduler.config.solver_type == "logrho" + + # Switch to UniPC, verify the solver is the default + pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) + assert pipe.scheduler.config.solver_type == "bh2" + + +class SchedulerCommonTest(unittest.TestCase): + scheduler_classes = () + forward_default_kwargs = () + + @property + def default_num_inference_steps(self): + return 50 + + @property + def default_timestep(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.get("num_inference_steps", self.default_num_inference_steps) + + try: + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + timestep = scheduler.timesteps[0] + except NotImplementedError: + logger.warning( + f"The scheduler {self.__class__.__name__} does not implement a `get_scheduler_config` method." + f" `default_timestep` will be set to the default value of 1." + ) + timestep = 1 + + return timestep + + # NOTE: currently taking the convention that default_timestep > default_timestep_2 (alternatively, + # default_timestep comes earlier in the timestep schedule than default_timestep_2) + @property + def default_timestep_2(self): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.get("num_inference_steps", self.default_num_inference_steps) + + try: + scheduler_config = self.get_scheduler_config() + scheduler = self.scheduler_classes[0](**scheduler_config) + + scheduler.set_timesteps(num_inference_steps) + if len(scheduler.timesteps) >= 2: + timestep_2 = scheduler.timesteps[1] + else: + logger.warning( + f"Using num_inference_steps from the scheduler testing class's default config leads to a timestep" + f" scheduler of length {len(scheduler.timesteps)} < 2. The default `default_timestep_2` value of 0" + f" will be used." + ) + timestep_2 = 0 + except NotImplementedError: + logger.warning( + f"The scheduler {self.__class__.__name__} does not implement a `get_scheduler_config` method." + f" `default_timestep_2` will be set to the default value of 0." + ) + timestep_2 = 0 + + return timestep_2 + + @property + def dummy_sample(self): + batch_size = 4 + num_channels = 3 + height = 8 + width = 8 + + sample = torch.rand((batch_size, num_channels, height, width)) + + return sample + + @property + def dummy_noise_deter(self): + batch_size = 4 + num_channels = 3 + height = 8 + width = 8 + + num_elems = batch_size * num_channels * height * width + sample = torch.arange(num_elems).flip(-1) + sample = sample.reshape(num_channels, height, width, batch_size) + sample = sample / num_elems + sample = sample.permute(3, 0, 1, 2) + + return sample + + @property + def dummy_sample_deter(self): + batch_size = 4 + num_channels = 3 + height = 8 + width = 8 + + num_elems = batch_size * num_channels * height * width + sample = torch.arange(num_elems) + sample = sample.reshape(num_channels, height, width, batch_size) + sample = sample / num_elems + sample = sample.permute(3, 0, 1, 2) + + return sample + + def get_scheduler_config(self): + raise NotImplementedError + + def dummy_model(self): + def model(sample, t, *args): + # if t is a tensor, match the number of dimensions of sample + if isinstance(t, torch.Tensor): + num_dims = len(sample.shape) + # pad t with 1s to match num_dims + t = t.reshape(-1, *(1,) * (num_dims - 1)).to(sample.device, dtype=sample.dtype) + + return sample * t / (t + 1) + + return model + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + time_step = time_step if time_step is not None else self.default_timestep + + for scheduler_class in self.scheduler_classes: + # TODO(Suraj) - delete the following two lines once DDPM, DDIM, and PNDM have timesteps casted to float by default + if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): + time_step = float(time_step) + + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + if scheduler_class == CMStochasticIterativeScheduler: + # Get valid timestep based on sigma_max, which should always be in timestep schedule. + scaled_sigma_max = scheduler.sigma_to_t(scheduler.config.sigma_max) + time_step = scaled_sigma_max + + if scheduler_class == EDMEulerScheduler: + time_step = scheduler.timesteps[-1] + + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, time_step) + else: + sample = self.dummy_sample + residual = 0.1 * sample + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + new_scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # Make sure `scale_model_input` is invoked to prevent a warning + if scheduler_class == CMStochasticIterativeScheduler: + # Get valid timestep based on sigma_max, which should always be in timestep schedule. + _ = scheduler.scale_model_input(sample, scaled_sigma_max) + _ = new_scheduler.scale_model_input(sample, scaled_sigma_max) + elif scheduler_class != VQDiffusionScheduler: + _ = scheduler.scale_model_input(sample, scheduler.timesteps[-1]) + _ = new_scheduler.scale_model_input(sample, scheduler.timesteps[-1]) + + # Set the seed before step() as some schedulers are stochastic like EulerAncestralDiscreteScheduler, EulerDiscreteScheduler + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + kwargs.update(forward_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + time_step = time_step if time_step is not None else self.default_timestep + + for scheduler_class in self.scheduler_classes: + if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): + time_step = float(time_step) + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, time_step) + else: + sample = self.dummy_sample + residual = 0.1 * sample + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + new_scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def test_from_save_pretrained(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", self.default_num_inference_steps) + + for scheduler_class in self.scheduler_classes: + timestep = self.default_timestep + if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): + timestep = float(timestep) + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + if scheduler_class == CMStochasticIterativeScheduler: + # Get valid timestep based on sigma_max, which should always be in timestep schedule. + timestep = scheduler.sigma_to_t(scheduler.config.sigma_max) + + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, timestep) + else: + sample = self.dummy_sample + residual = 0.1 * sample + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + new_scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + output = scheduler.step(residual, timestep, sample, **kwargs).prev_sample + + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + new_output = new_scheduler.step(residual, timestep, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def test_compatibles(self): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + + scheduler = scheduler_class(**scheduler_config) + + assert all(c is not None for c in scheduler.compatibles) + + for comp_scheduler_cls in scheduler.compatibles: + comp_scheduler = comp_scheduler_cls.from_config(scheduler.config) + assert comp_scheduler is not None + + new_scheduler = scheduler_class.from_config(comp_scheduler.config) + + new_scheduler_config = {k: v for k, v in new_scheduler.config.items() if k in scheduler.config} + scheduler_diff = {k: v for k, v in new_scheduler.config.items() if k not in scheduler.config} + + # make sure that configs are essentially identical + assert new_scheduler_config == dict(scheduler.config) + + # make sure that only differences are for configs that are not in init + init_keys = inspect.signature(scheduler_class.__init__).parameters.keys() + assert set(scheduler_diff.keys()).intersection(set(init_keys)) == set() + + def test_from_pretrained(self): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + + scheduler = scheduler_class(**scheduler_config) + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_pretrained(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + # `_use_default_values` should not exist for just saved & loaded scheduler + scheduler_config = dict(scheduler.config) + del scheduler_config["_use_default_values"] + + assert scheduler_config == new_scheduler.config + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", self.default_num_inference_steps) + + timestep_0 = self.default_timestep + timestep_1 = self.default_timestep_2 + + for scheduler_class in self.scheduler_classes: + if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): + timestep_0 = float(timestep_0) + timestep_1 = float(timestep_1) + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, timestep_0) + else: + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + output_0 = scheduler.step(residual, timestep_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, timestep_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_scheduler_outputs_equivalence(self): + def set_nan_tensor_to_zero(t): + t[t != t] = 0 + return t + + def recursive_check(tuple_object, dict_object): + if isinstance(tuple_object, (List, Tuple)): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object.values()): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif isinstance(tuple_object, Dict): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object.values(), dict_object.values()): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif tuple_object is None: + return + else: + self.assertTrue( + torch.allclose( + set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 + ), + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), + ) + + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", self.default_num_inference_steps) + + timestep = self.default_timestep + if len(self.scheduler_classes) > 0 and self.scheduler_classes[0] == IPNDMScheduler: + timestep = 1 + + for scheduler_class in self.scheduler_classes: + if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): + timestep = float(timestep) + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + if scheduler_class == CMStochasticIterativeScheduler: + # Get valid timestep based on sigma_max, which should always be in timestep schedule. + timestep = scheduler.sigma_to_t(scheduler.config.sigma_max) + + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, timestep) + else: + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # Set the seed before state as some schedulers are stochastic like EulerAncestralDiscreteScheduler, EulerDiscreteScheduler + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + outputs_dict = scheduler.step(residual, timestep, sample, **kwargs) + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # Set the seed before state as some schedulers are stochastic like EulerAncestralDiscreteScheduler, EulerDiscreteScheduler + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.manual_seed(0) + outputs_tuple = scheduler.step(residual, timestep, sample, return_dict=False, **kwargs) + + recursive_check(outputs_tuple, outputs_dict) + + def test_scheduler_public_api(self): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + if scheduler_class != VQDiffusionScheduler: + self.assertTrue( + hasattr(scheduler, "init_noise_sigma"), + f"{scheduler_class} does not implement a required attribute `init_noise_sigma`", + ) + self.assertTrue( + hasattr(scheduler, "scale_model_input"), + ( + f"{scheduler_class} does not implement a required class method `scale_model_input(sample," + " timestep)`" + ), + ) + self.assertTrue( + hasattr(scheduler, "step"), + f"{scheduler_class} does not implement a required class method `step(...)`", + ) + + if scheduler_class != VQDiffusionScheduler: + sample = self.dummy_sample + if scheduler_class == CMStochasticIterativeScheduler: + # Get valid timestep based on sigma_max, which should always be in timestep schedule. + scaled_sigma_max = scheduler.sigma_to_t(scheduler.config.sigma_max) + scaled_sample = scheduler.scale_model_input(sample, scaled_sigma_max) + elif scheduler_class == EDMEulerScheduler: + scaled_sample = scheduler.scale_model_input(sample, scheduler.timesteps[-1]) + else: + scaled_sample = scheduler.scale_model_input(sample, 0.0) + self.assertEqual(sample.shape, scaled_sample.shape) + + def test_add_noise_device(self): + for scheduler_class in self.scheduler_classes: + if scheduler_class == IPNDMScheduler: + continue + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(self.default_num_inference_steps) + + sample = self.dummy_sample.to(torch_device) + if scheduler_class == CMStochasticIterativeScheduler: + # Get valid timestep based on sigma_max, which should always be in timestep schedule. + scaled_sigma_max = scheduler.sigma_to_t(scheduler.config.sigma_max) + scaled_sample = scheduler.scale_model_input(sample, scaled_sigma_max) + elif scheduler_class == EDMEulerScheduler: + scaled_sample = scheduler.scale_model_input(sample, scheduler.timesteps[-1]) + else: + scaled_sample = scheduler.scale_model_input(sample, 0.0) + self.assertEqual(sample.shape, scaled_sample.shape) + + noise = torch.randn(scaled_sample.shape).to(torch_device) + t = scheduler.timesteps[5][None] + noised = scheduler.add_noise(scaled_sample, noise, t) + self.assertEqual(noised.shape, scaled_sample.shape) + + def test_deprecated_kwargs(self): + for scheduler_class in self.scheduler_classes: + has_kwarg_in_model_class = "kwargs" in inspect.signature(scheduler_class.__init__).parameters + has_deprecated_kwarg = len(scheduler_class._deprecated_kwargs) > 0 + + if has_kwarg_in_model_class and not has_deprecated_kwarg: + raise ValueError( + f"{scheduler_class} has `**kwargs` in its __init__ method but has not defined any deprecated" + " kwargs under the `_deprecated_kwargs` class attribute. Make sure to either remove `**kwargs` if" + " there are no deprecated arguments or add the deprecated argument with `_deprecated_kwargs =" + " []`" + ) + + if not has_kwarg_in_model_class and has_deprecated_kwarg: + raise ValueError( + f"{scheduler_class} doesn't have `**kwargs` in its __init__ method but has defined deprecated" + " kwargs under the `_deprecated_kwargs` class attribute. Make sure to either add the `**kwargs`" + f" argument to {self.model_class}.__init__ if there are deprecated arguments or remove the" + " deprecated argument from `_deprecated_kwargs = []`" + ) + + def test_trained_betas(self): + for scheduler_class in self.scheduler_classes: + if scheduler_class in (VQDiffusionScheduler, CMStochasticIterativeScheduler): + continue + + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config, trained_betas=np.array([0.1, 0.3])) + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_pretrained(tmpdirname) + new_scheduler = scheduler_class.from_pretrained(tmpdirname) + + assert scheduler.betas.tolist() == new_scheduler.betas.tolist() + + def test_getattr_is_correct(self): + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + # save some things to test + scheduler.dummy_attribute = 5 + scheduler.register_to_config(test_attribute=5) + + logger = logging.get_logger("diffusers.configuration_utils") + # 30 for warning + logger.setLevel(30) + with CaptureLogger(logger) as cap_logger: + assert hasattr(scheduler, "dummy_attribute") + assert getattr(scheduler, "dummy_attribute") == 5 + assert scheduler.dummy_attribute == 5 + + # no warning should be thrown + assert cap_logger.out == "" + + logger = logging.get_logger("diffusers.schedulers.scheduling_utils") + # 30 for warning + logger.setLevel(30) + with CaptureLogger(logger) as cap_logger: + assert hasattr(scheduler, "save_pretrained") + fn = scheduler.save_pretrained + fn_1 = getattr(scheduler, "save_pretrained") + + assert fn == fn_1 + # no warning should be thrown + assert cap_logger.out == "" + + # warning should be thrown + with self.assertWarns(FutureWarning): + assert scheduler.test_attribute == 5 + + with self.assertWarns(FutureWarning): + assert getattr(scheduler, "test_attribute") == 5 + + with self.assertRaises(AttributeError) as error: + scheduler.does_not_exist + + assert str(error.exception) == f"'{type(scheduler).__name__}' object has no attribute 'does_not_exist'" + + +@is_staging_test +class SchedulerPushToHubTester(unittest.TestCase): + identifier = uuid.uuid4() + repo_id = f"test-scheduler-{identifier}" + org_repo_id = f"valid_org/{repo_id}-org" + + def test_push_to_hub(self): + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + ) + scheduler.push_to_hub(self.repo_id, token=TOKEN) + scheduler_loaded = DDIMScheduler.from_pretrained(f"{USER}/{self.repo_id}") + + assert type(scheduler) == type(scheduler_loaded) + + # Reset repo + delete_repo(token=TOKEN, repo_id=self.repo_id) + + # Push to hub via save_config + with tempfile.TemporaryDirectory() as tmp_dir: + scheduler.save_config(tmp_dir, repo_id=self.repo_id, push_to_hub=True, token=TOKEN) + + scheduler_loaded = DDIMScheduler.from_pretrained(f"{USER}/{self.repo_id}") + + assert type(scheduler) == type(scheduler_loaded) + + # Reset repo + delete_repo(token=TOKEN, repo_id=self.repo_id) + + def test_push_to_hub_in_organization(self): + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + ) + scheduler.push_to_hub(self.org_repo_id, token=TOKEN) + scheduler_loaded = DDIMScheduler.from_pretrained(self.org_repo_id) + + assert type(scheduler) == type(scheduler_loaded) + + # Reset repo + delete_repo(token=TOKEN, repo_id=self.org_repo_id) + + # Push to hub via save_config + with tempfile.TemporaryDirectory() as tmp_dir: + scheduler.save_config(tmp_dir, repo_id=self.org_repo_id, push_to_hub=True, token=TOKEN) + + scheduler_loaded = DDIMScheduler.from_pretrained(self.org_repo_id) + + assert type(scheduler) == type(scheduler_loaded) + + # Reset repo + delete_repo(token=TOKEN, repo_id=self.org_repo_id) diff --git a/pythonProject/diffusers-main/tests/single_file/__init__.py b/pythonProject/diffusers-main/tests/single_file/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonProject/diffusers-main/tests/single_file/single_file_testing_utils.py b/pythonProject/diffusers-main/tests/single_file/single_file_testing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3510d3371ca5e5bbb5acd9e3dd3e2e4e4f83255a --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/single_file_testing_utils.py @@ -0,0 +1,412 @@ +import tempfile +from io import BytesIO + +import requests +import torch +from huggingface_hub import hf_hub_download, snapshot_download + +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.models.attention_processor import AttnProcessor + +from ..testing_utils import ( + numpy_cosine_similarity_distance, + torch_device, +) + + +def download_single_file_checkpoint(repo_id, filename, tmpdir): + path = hf_hub_download(repo_id, filename=filename, local_dir=tmpdir) + return path + + +def download_original_config(config_url, tmpdir): + original_config_file = BytesIO(requests.get(config_url).content) + path = f"{tmpdir}/config.yaml" + with open(path, "wb") as f: + f.write(original_config_file.read()) + + return path + + +def download_diffusers_config(repo_id, tmpdir): + path = snapshot_download( + repo_id, + ignore_patterns=[ + "**/*.ckpt", + "*.ckpt", + "**/*.bin", + "*.bin", + "**/*.pt", + "*.pt", + "**/*.safetensors", + "*.safetensors", + ], + allow_patterns=["**/*.json", "*.json", "*.txt", "**/*.txt"], + local_dir=tmpdir, + ) + return path + + +class SDSingleFileTesterMixin: + single_file_kwargs = {} + + def _compare_component_configs(self, pipe, single_file_pipe): + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = [ + "torch_dtype", + "_name_or_path", + "architectures", + "_use_default_values", + "_diffusers_version", + ] + for component_name, component in single_file_pipe.components.items(): + if component_name in single_file_pipe._optional_components: + continue + + # skip testing transformer based components here + # skip text encoders / safety checkers since they have already been tested + if component_name in ["text_encoder", "tokenizer", "safety_checker", "feature_extractor"]: + continue + + assert component_name in pipe.components, f"single file {component_name} not found in pretrained pipeline" + assert isinstance(component, pipe.components[component_name].__class__), ( + f"single file {component.__class__.__name__} and pretrained {pipe.components[component_name].__class__.__name__} are not the same" + ) + + for param_name, param_value in component.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + + # Some pretrained configs will set upcast attention to None + # In single file loading it defaults to the value in the class __init__ which is False + if param_name == "upcast_attention" and pipe.components[component_name].config[param_name] is None: + pipe.components[component_name].config[param_name] = param_value + + assert pipe.components[component_name].config[param_name] == param_value, ( + f"single file {param_name}: {param_value} differs from pretrained {pipe.components[component_name].config[param_name]}" + ) + + def test_single_file_components(self, pipe=None, single_file_pipe=None): + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, safety_checker=None + ) + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_local_files_only(self, pipe=None, single_file_pipe=None): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + local_ckpt_path, safety_checker=None, local_files_only=True + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_original_config( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + # Not possible to infer this value when original config is provided + # we just pass it in here otherwise this test will fail + upcast_attention = pipe.unet.config.upcast_attention + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, + original_config=self.original_config, + safety_checker=None, + upcast_attention=upcast_attention, + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_original_config_local_files_only( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + # Not possible to infer this value when original config is provided + # we just pass it in here otherwise this test will fail + upcast_attention = pipe.unet.config.upcast_attention + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_original_config = download_original_config(self.original_config, tmpdir) + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + local_ckpt_path, + original_config=local_original_config, + safety_checker=None, + upcast_attention=upcast_attention, + local_files_only=True, + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_format_inference_is_same_as_pretrained(self, expected_max_diff=1e-4): + sf_pipe = self.pipeline_class.from_single_file(self.ckpt_path, safety_checker=None, **self.single_file_kwargs) + sf_pipe.unet.set_attn_processor(AttnProcessor()) + sf_pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + image_single_file = sf_pipe(**inputs).images[0] + + pipe = self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + pipe.unet.set_attn_processor(AttnProcessor()) + pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images[0] + + max_diff = numpy_cosine_similarity_distance(image.flatten(), image_single_file.flatten()) + + assert max_diff < expected_max_diff, f"{image.flatten()} != {image_single_file.flatten()}" + + def test_single_file_components_with_diffusers_config( + self, + pipe=None, + single_file_pipe=None, + ): + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, config=self.repo_id, safety_checker=None + ) + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_diffusers_config_local_files_only( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + local_ckpt_path, config=local_diffusers_config, safety_checker=None, local_files_only=True + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_setting_pipeline_dtype_to_fp16( + self, + single_file_pipe=None, + ): + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, torch_dtype=torch.float16 + ) + + for component_name, component in single_file_pipe.components.items(): + if not isinstance(component, torch.nn.Module): + continue + + assert component.dtype == torch.float16 + + +class SDXLSingleFileTesterMixin: + def _compare_component_configs(self, pipe, single_file_pipe): + # Skip testing the text_encoder for Refiner Pipelines + if pipe.text_encoder: + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + for param_name, param_value in single_file_pipe.text_encoder_2.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder_2.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = [ + "torch_dtype", + "_name_or_path", + "architectures", + "_use_default_values", + "_diffusers_version", + ] + for component_name, component in single_file_pipe.components.items(): + if component_name in single_file_pipe._optional_components: + continue + + # skip text encoders since they have already been tested + if component_name in ["text_encoder", "text_encoder_2", "tokenizer", "tokenizer_2"]: + continue + + # skip safety checker if it is not present in the pipeline + if component_name in ["safety_checker", "feature_extractor"]: + continue + + assert component_name in pipe.components, f"single file {component_name} not found in pretrained pipeline" + assert isinstance(component, pipe.components[component_name].__class__), ( + f"single file {component.__class__.__name__} and pretrained {pipe.components[component_name].__class__.__name__} are not the same" + ) + + for param_name, param_value in component.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + + # Some pretrained configs will set upcast attention to None + # In single file loading it defaults to the value in the class __init__ which is False + if param_name == "upcast_attention" and pipe.components[component_name].config[param_name] is None: + pipe.components[component_name].config[param_name] = param_value + + assert pipe.components[component_name].config[param_name] == param_value, ( + f"single file {param_name}: {param_value} differs from pretrained {pipe.components[component_name].config[param_name]}" + ) + + def test_single_file_components(self, pipe=None, single_file_pipe=None): + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, safety_checker=None + ) + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + self._compare_component_configs( + pipe, + single_file_pipe, + ) + + def test_single_file_components_local_files_only( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + local_ckpt_path, safety_checker=None, local_files_only=True + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_original_config( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + # Not possible to infer this value when original config is provided + # we just pass it in here otherwise this test will fail + upcast_attention = pipe.unet.config.upcast_attention + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, + original_config=self.original_config, + safety_checker=None, + upcast_attention=upcast_attention, + ) + + self._compare_component_configs( + pipe, + single_file_pipe, + ) + + def test_single_file_components_with_original_config_local_files_only( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + # Not possible to infer this value when original config is provided + # we just pass it in here otherwise this test will fail + upcast_attention = pipe.unet.config.upcast_attention + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_original_config = download_original_config(self.original_config, tmpdir) + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + local_ckpt_path, + original_config=local_original_config, + upcast_attention=upcast_attention, + safety_checker=None, + local_files_only=True, + ) + + self._compare_component_configs( + pipe, + single_file_pipe, + ) + + def test_single_file_components_with_diffusers_config( + self, + pipe=None, + single_file_pipe=None, + ): + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, config=self.repo_id, safety_checker=None + ) + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_diffusers_config_local_files_only( + self, + pipe=None, + single_file_pipe=None, + ): + pipe = pipe or self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + local_ckpt_path, config=local_diffusers_config, safety_checker=None, local_files_only=True + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_format_inference_is_same_as_pretrained(self, expected_max_diff=1e-4): + sf_pipe = self.pipeline_class.from_single_file(self.ckpt_path, torch_dtype=torch.float16, safety_checker=None) + sf_pipe.unet.set_default_attn_processor() + sf_pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + image_single_file = sf_pipe(**inputs).images[0] + + pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16, safety_checker=None) + pipe.unet.set_default_attn_processor() + pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + image = pipe(**inputs).images[0] + + max_diff = numpy_cosine_similarity_distance(image.flatten(), image_single_file.flatten()) + + assert max_diff < expected_max_diff + + def test_single_file_setting_pipeline_dtype_to_fp16( + self, + single_file_pipe=None, + ): + single_file_pipe = single_file_pipe or self.pipeline_class.from_single_file( + self.ckpt_path, torch_dtype=torch.float16 + ) + + for component_name, component in single_file_pipe.components.items(): + if not isinstance(component, torch.nn.Module): + continue + + assert component.dtype == torch.float16 diff --git a/pythonProject/diffusers-main/tests/single_file/test_lumina2_transformer.py b/pythonProject/diffusers-main/tests/single_file/test_lumina2_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..99d9b71395c62d9a8f094f954424a7537a549199 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_lumina2_transformer.py @@ -0,0 +1,73 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +from diffusers import ( + Lumina2Transformer2DModel, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + torch_device, +) + + +enable_full_determinism() + + +@require_torch_accelerator +class Lumina2Transformer2DModelSingleFileTests(unittest.TestCase): + model_class = Lumina2Transformer2DModel + ckpt_path = "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/split_files/diffusion_models/lumina_2_model_bf16.safetensors" + alternate_keys_ckpt_paths = [ + "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/split_files/diffusion_models/lumina_2_model_bf16.safetensors" + ] + + repo_id = "Alpha-VLLM/Lumina-Image-2.0" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id, subfolder="transformer") + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_checkpoint_loading(self): + for ckpt_path in self.alternate_keys_ckpt_paths: + backend_empty_cache(torch_device) + model = self.model_class.from_single_file(ckpt_path) + + del model + gc.collect() + backend_empty_cache(torch_device) diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_autoencoder_dc_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_autoencoder_dc_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..5195f8e52f8d31023803864869567c8867f9cc0e --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_autoencoder_dc_single_file.py @@ -0,0 +1,127 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import torch + +from diffusers import ( + AutoencoderDC, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + load_hf_numpy, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class AutoencoderDCSingleFileTests(unittest.TestCase): + model_class = AutoencoderDC + ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f32c32-sana-1.0/blob/main/model.safetensors" + repo_id = "mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers" + main_input_name = "sample" + base_precision = 1e-2 + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_file_format(self, seed, shape): + return f"gaussian_noise_s={seed}_shape={'_'.join([str(s) for s in shape])}.npy" + + def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False): + dtype = torch.float16 if fp16 else torch.float32 + image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + return image + + def test_single_file_inference_same_as_pretrained(self): + model_1 = self.model_class.from_pretrained(self.repo_id).to(torch_device) + model_2 = self.model_class.from_single_file(self.ckpt_path, config=self.repo_id).to(torch_device) + + image = self.get_sd_image(33) + + with torch.no_grad(): + sample_1 = model_1(image).sample + sample_2 = model_2(image).sample + + assert sample_1.shape == sample_2.shape + + output_slice_1 = sample_1.flatten().float().cpu() + output_slice_2 = sample_2.flatten().float().cpu() + + assert numpy_cosine_similarity_distance(output_slice_1, output_slice_2) < 1e-4 + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id) + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) + + def test_single_file_in_type_variant_components(self): + # `in` variant checkpoints require passing in a `config` parameter + # in order to set the scaling factor correctly. + # `in` and `mix` variants have the same keys and we cannot automatically infer a scaling factor. + # We default to using the `mix` config + repo_id = "mit-han-lab/dc-ae-f128c512-in-1.0-diffusers" + ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0/blob/main/model.safetensors" + + model = self.model_class.from_pretrained(repo_id) + model_single_file = self.model_class.from_single_file(ckpt_path, config=repo_id) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) + + def test_single_file_mix_type_variant_components(self): + repo_id = "mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers" + ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-mix-1.0/blob/main/model.safetensors" + + model = self.model_class.from_pretrained(repo_id) + model_single_file = self.model_class.from_single_file(ckpt_path, config=repo_id) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_controlnet_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_controlnet_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..e5214fe3f209bc2bcd371e0000e1bce0c28de4d0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_controlnet_single_file.py @@ -0,0 +1,81 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import torch + +from diffusers import ( + ControlNetModel, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class ControlNetModelSingleFileTests(unittest.TestCase): + model_class = ControlNetModel + ckpt_path = "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" + repo_id = "lllyasviel/control_v11p_sd15_canny" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id) + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_single_file_arguments(self): + model_default = self.model_class.from_single_file(self.ckpt_path) + + assert model_default.config.upcast_attention is False + assert model_default.dtype == torch.float32 + + torch_dtype = torch.float16 + upcast_attention = True + + model = self.model_class.from_single_file( + self.ckpt_path, + upcast_attention=upcast_attention, + torch_dtype=torch_dtype, + ) + assert model.config.upcast_attention == upcast_attention + assert model.dtype == torch_dtype diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_flux_transformer_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_flux_transformer_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..8290c339b931a19c770b30de23502a662d4ecffd --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_flux_transformer_single_file.py @@ -0,0 +1,79 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +from diffusers import ( + FluxTransformer2DModel, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + torch_device, +) + + +enable_full_determinism() + + +@require_torch_accelerator +class FluxTransformer2DModelSingleFileTests(unittest.TestCase): + model_class = FluxTransformer2DModel + ckpt_path = "https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors" + alternate_keys_ckpt_paths = ["https://huggingface.co/Comfy-Org/flux1-dev/blob/main/flux1-dev-fp8.safetensors"] + + repo_id = "black-forest-labs/FLUX.1-dev" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id, subfolder="transformer") + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_checkpoint_loading(self): + for ckpt_path in self.alternate_keys_ckpt_paths: + backend_empty_cache(torch_device) + model = self.model_class.from_single_file(ckpt_path) + + del model + gc.collect() + backend_empty_cache(torch_device) + + def test_device_map_cuda(self): + backend_empty_cache(torch_device) + model = self.model_class.from_single_file(self.ckpt_path, device_map="cuda") + + del model + gc.collect() + backend_empty_cache(torch_device) diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_motion_adapter_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_motion_adapter_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..7aaf4b577e4b63a163d3d67aa7b9afb998186d7b --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_motion_adapter_single_file.py @@ -0,0 +1,91 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from diffusers import ( + MotionAdapter, +) + +from ..testing_utils import ( + enable_full_determinism, +) + + +enable_full_determinism() + + +class MotionAdapterSingleFileTests(unittest.TestCase): + model_class = MotionAdapter + + def test_single_file_components_version_v1_5(self): + ckpt_path = "https://huggingface.co/guoyww/animatediff/blob/main/mm_sd_v15.ckpt" + repo_id = "guoyww/animatediff-motion-adapter-v1-5" + + model = self.model_class.from_pretrained(repo_id) + model_single_file = self.model_class.from_single_file(ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) + + def test_single_file_components_version_v1_5_2(self): + ckpt_path = "https://huggingface.co/guoyww/animatediff/blob/main/mm_sd_v15_v2.ckpt" + repo_id = "guoyww/animatediff-motion-adapter-v1-5-2" + + model = self.model_class.from_pretrained(repo_id) + model_single_file = self.model_class.from_single_file(ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) + + def test_single_file_components_version_v1_5_3(self): + ckpt_path = "https://huggingface.co/guoyww/animatediff/blob/main/v3_sd15_mm.ckpt" + repo_id = "guoyww/animatediff-motion-adapter-v1-5-3" + + model = self.model_class.from_pretrained(repo_id) + model_single_file = self.model_class.from_single_file(ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) + + def test_single_file_components_version_sdxl_beta(self): + ckpt_path = "https://huggingface.co/guoyww/animatediff/blob/main/mm_sdxl_v10_beta.ckpt" + repo_id = "guoyww/animatediff-motion-adapter-sdxl-beta" + + model = self.model_class.from_pretrained(repo_id) + model_single_file = self.model_class.from_single_file(ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_sd_cascade_unet_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_sd_cascade_unet_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..a5ec9dba30df723fab174ed7807a8143b51a8743 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_sd_cascade_unet_single_file.py @@ -0,0 +1,117 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import torch + +from diffusers import StableCascadeUNet +from diffusers.utils import logging + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) + + +logger = logging.get_logger(__name__) + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableCascadeUNetSingleFileTest(unittest.TestCase): + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components_stage_b(self): + model_single_file = StableCascadeUNet.from_single_file( + "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b_bf16.safetensors", + torch_dtype=torch.bfloat16, + ) + model = StableCascadeUNet.from_pretrained( + "stabilityai/stable-cascade", variant="bf16", subfolder="decoder", use_safetensors=True + ) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_single_file_components_stage_b_lite(self): + model_single_file = StableCascadeUNet.from_single_file( + "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b_lite_bf16.safetensors", + torch_dtype=torch.bfloat16, + ) + model = StableCascadeUNet.from_pretrained( + "stabilityai/stable-cascade", variant="bf16", subfolder="decoder_lite" + ) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_single_file_components_stage_c(self): + model_single_file = StableCascadeUNet.from_single_file( + "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors", + torch_dtype=torch.bfloat16, + ) + model = StableCascadeUNet.from_pretrained( + "stabilityai/stable-cascade-prior", variant="bf16", subfolder="prior" + ) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_single_file_components_stage_c_lite(self): + model_single_file = StableCascadeUNet.from_single_file( + "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_lite_bf16.safetensors", + torch_dtype=torch.bfloat16, + ) + model = StableCascadeUNet.from_pretrained( + "stabilityai/stable-cascade-prior", variant="bf16", subfolder="prior_lite" + ) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_vae_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_vae_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..3b9e619f13e6e9ef74b3693fd50c428c9e895f21 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_vae_single_file.py @@ -0,0 +1,119 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import torch + +from diffusers import ( + AutoencoderKL, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + load_hf_numpy, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class AutoencoderKLSingleFileTests(unittest.TestCase): + model_class = AutoencoderKL + ckpt_path = ( + "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" + ) + repo_id = "stabilityai/sd-vae-ft-mse" + main_input_name = "sample" + base_precision = 1e-2 + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_file_format(self, seed, shape): + return f"gaussian_noise_s={seed}_shape={'_'.join([str(s) for s in shape])}.npy" + + def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False): + dtype = torch.float16 if fp16 else torch.float32 + image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + return image + + def test_single_file_inference_same_as_pretrained(self): + model_1 = self.model_class.from_pretrained(self.repo_id).to(torch_device) + model_2 = self.model_class.from_single_file(self.ckpt_path, config=self.repo_id).to(torch_device) + + image = self.get_sd_image(33) + + generator = torch.Generator(torch_device) + + with torch.no_grad(): + sample_1 = model_1(image, generator=generator.manual_seed(0)).sample + sample_2 = model_2(image, generator=generator.manual_seed(0)).sample + + assert sample_1.shape == sample_2.shape + + output_slice_1 = sample_1.flatten().float().cpu() + output_slice_2 = sample_2.flatten().float().cpu() + + assert numpy_cosine_similarity_distance(output_slice_1, output_slice_2) < 1e-4 + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id) + model_single_file = self.model_class.from_single_file(self.ckpt_path, config=self.repo_id) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between pretrained loading and single file loading" + ) + + def test_single_file_arguments(self): + model_default = self.model_class.from_single_file(self.ckpt_path, config=self.repo_id) + + assert model_default.config.scaling_factor == 0.18215 + assert model_default.config.sample_size == 256 + assert model_default.dtype == torch.float32 + + scaling_factor = 2.0 + sample_size = 512 + torch_dtype = torch.float16 + + model = self.model_class.from_single_file( + self.ckpt_path, + config=self.repo_id, + sample_size=sample_size, + scaling_factor=scaling_factor, + torch_dtype=torch_dtype, + ) + assert model.config.scaling_factor == scaling_factor + assert model.config.sample_size == sample_size + assert model.dtype == torch_dtype diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_wan_autoencoder_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_wan_autoencoder_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f7155c1072c593ca90441525ec33369da312ee --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_wan_autoencoder_single_file.py @@ -0,0 +1,62 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +from diffusers import ( + AutoencoderKLWan, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + torch_device, +) + + +enable_full_determinism() + + +@require_torch_accelerator +class AutoencoderKLWanSingleFileTests(unittest.TestCase): + model_class = AutoencoderKLWan + ckpt_path = ( + "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors" + ) + repo_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id, subfolder="vae") + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) diff --git a/pythonProject/diffusers-main/tests/single_file/test_model_wan_transformer3d_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_model_wan_transformer3d_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c758d3d933c36b676be54db1ec1522f74e27b7 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_model_wan_transformer3d_single_file.py @@ -0,0 +1,94 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import torch + +from diffusers import ( + WanTransformer3DModel, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_big_accelerator, + require_torch_accelerator, + torch_device, +) + + +enable_full_determinism() + + +@require_torch_accelerator +class WanTransformer3DModelText2VideoSingleFileTest(unittest.TestCase): + model_class = WanTransformer3DModel + ckpt_path = "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_t2v_1.3B_bf16.safetensors" + repo_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id, subfolder="transformer") + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + +@require_big_accelerator +@require_torch_accelerator +class WanTransformer3DModelImage2VideoSingleFileTest(unittest.TestCase): + model_class = WanTransformer3DModel + ckpt_path = "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_i2v_480p_14B_fp8_e4m3fn.safetensors" + repo_id = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers" + torch_dtype = torch.float8_e4m3fn + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id, subfolder="transformer", torch_dtype=self.torch_dtype) + model_single_file = self.model_class.from_single_file(self.ckpt_path, torch_dtype=self.torch_dtype) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) diff --git a/pythonProject/diffusers-main/tests/single_file/test_sana_transformer.py b/pythonProject/diffusers-main/tests/single_file/test_sana_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..c1543ba17137ab7d4647d9e7b019f33773c85124 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_sana_transformer.py @@ -0,0 +1,60 @@ +import gc +import unittest + +from diffusers import ( + SanaTransformer2DModel, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + torch_device, +) + + +enable_full_determinism() + + +@require_torch_accelerator +class SanaTransformer2DModelSingleFileTests(unittest.TestCase): + model_class = SanaTransformer2DModel + ckpt_path = ( + "https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px/blob/main/checkpoints/Sana_1600M_1024px.pth" + ) + alternate_keys_ckpt_paths = [ + "https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px/blob/main/checkpoints/Sana_1600M_1024px.pth" + ] + + repo_id = "Efficient-Large-Model/Sana_1600M_1024px_diffusers" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_components(self): + model = self.model_class.from_pretrained(self.repo_id, subfolder="transformer") + model_single_file = self.model_class.from_single_file(self.ckpt_path) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"] + for param_name, param_value in model_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert model.config[param_name] == param_value, ( + f"{param_name} differs between single file loading and pretrained loading" + ) + + def test_checkpoint_loading(self): + for ckpt_path in self.alternate_keys_ckpt_paths: + backend_empty_cache(torch_device) + model = self.model_class.from_single_file(ckpt_path) + + del model + gc.collect() + backend_empty_cache(torch_device) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_img2img_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_img2img_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..e558eeaf6f472a14f8189298ebc7c0f292119738 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_img2img_single_file.py @@ -0,0 +1,197 @@ +import gc +import tempfile +import unittest + +import torch + +from diffusers import ControlNetModel, StableDiffusionControlNetPipeline +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import ( + SDSingleFileTesterMixin, + download_diffusers_config, + download_original_config, + download_single_file_checkpoint, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionControlNetPipeline + ckpt_path = ( + "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors" + ) + original_config = ( + "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml" + ) + repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + control_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" + ).resize((512, 512)) + prompt = "bird" + + inputs = { + "prompt": prompt, + "image": init_image, + "control_image": control_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe.unet.set_default_attn_processor() + pipe.enable_model_cpu_offload(device=torch_device) + + pipe_sf = self.pipeline_class.from_single_file( + self.ckpt_path, + controlnet=controlnet, + ) + pipe_sf.unet.set_default_attn_processor() + pipe_sf.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + output = pipe(**inputs).images[0] + + inputs = self.get_inputs(torch_device) + output_sf = pipe_sf(**inputs).images[0] + + max_diff = numpy_cosine_similarity_distance(output_sf.flatten(), output.flatten()) + assert max_diff < 1e-3 + + def test_single_file_components(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained( + self.repo_id, variant="fp16", safety_checker=None, controlnet=controlnet + ) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, + safety_checker=None, + controlnet=controlnet, + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_local_files_only(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weights_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weights_name, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, controlnet=controlnet, safety_checker=None, local_files_only=True + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, original_config=self.original_config + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weights_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weights_name, tmpdir) + + local_original_config = download_original_config(self.original_config, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + original_config=local_original_config, + controlnet=controlnet, + safety_checker=None, + local_files_only=True, + ) + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, original_config=self.original_config + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weights_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weights_name, tmpdir) + + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + config=local_diffusers_config, + safety_checker=None, + controlnet=controlnet, + local_files_only=True, + ) + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_setting_pipeline_dtype_to_fp16(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + single_file_pipe = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 + ) + super().test_single_file_setting_pipeline_dtype_to_fp16(single_file_pipe) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_inpaint_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_inpaint_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..54224f51a9b5418db6bb93e15ce004866564f15a --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_inpaint_single_file.py @@ -0,0 +1,197 @@ +import gc +import tempfile +import unittest + +import torch + +from diffusers import ControlNetModel, StableDiffusionControlNetInpaintPipeline +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import ( + SDSingleFileTesterMixin, + download_diffusers_config, + download_original_config, + download_single_file_checkpoint, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionControlNetInpaintPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionControlNetInpaintPipeline + ckpt_path = "https://huggingface.co/botp/stable-diffusion-v1-5-inpainting/blob/main/sd-v1-5-inpainting.ckpt" + original_config = "https://raw.githubusercontent.com/runwayml/stable-diffusion/main/configs/stable-diffusion/v1-inpainting-inference.yaml" + repo_id = "stable-diffusion-v1-5/stable-diffusion-inpainting" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self): + control_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" + ).resize((512, 512)) + image = load_image( + "https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png" + ).resize((512, 512)) + mask_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_mask.png" + ).resize((512, 512)) + + inputs = { + "prompt": "bird", + "image": image, + "control_image": control_image, + "mask_image": mask_image, + "generator": torch.Generator(device="cpu").manual_seed(0), + "num_inference_steps": 3, + "output_type": "np", + } + + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet, safety_checker=None) + pipe.unet.set_default_attn_processor() + pipe.enable_model_cpu_offload(device=torch_device) + + pipe_sf = self.pipeline_class.from_single_file(self.ckpt_path, controlnet=controlnet, safety_checker=None) + pipe_sf.unet.set_default_attn_processor() + pipe_sf.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs() + output = pipe(**inputs).images[0] + + inputs = self.get_inputs() + output_sf = pipe_sf(**inputs).images[0] + + max_diff = numpy_cosine_similarity_distance(output_sf.flatten(), output.flatten()) + assert max_diff < 2e-3 + + def test_single_file_components(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained( + self.repo_id, variant="fp16", safety_checker=None, controlnet=controlnet + ) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, + safety_checker=None, + controlnet=controlnet, + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_local_files_only(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained(self.repo_id, safety_checker=None, controlnet=controlnet) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, controlnet=controlnet, safety_checker=None, local_files_only=True + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + @unittest.skip("runwayml original config repo does not exist") + def test_single_file_components_with_original_config(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, original_config=self.original_config + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + @unittest.skip("runwayml original config repo does not exist") + def test_single_file_components_with_original_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + safety_checker=None, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_original_config = download_original_config(self.original_config, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + original_config=local_original_config, + controlnet=controlnet, + safety_checker=None, + local_files_only=True, + ) + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, + controlnet=controlnet, + config=self.repo_id, + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", + torch_dtype=torch.float16, + variant="fp16", + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + safety_checker=None, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + config=local_diffusers_config, + controlnet=controlnet, + safety_checker=None, + local_files_only=True, + ) + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_setting_pipeline_dtype_to_fp16(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + single_file_pipe = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 + ) + super().test_single_file_setting_pipeline_dtype_to_fp16(single_file_pipe) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..e90e648a9de9bded6c9641912cd46e2234dfc474 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_controlnet_single_file.py @@ -0,0 +1,185 @@ +import gc +import tempfile +import unittest + +import torch + +from diffusers import ControlNetModel, StableDiffusionControlNetPipeline +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import ( + SDSingleFileTesterMixin, + download_diffusers_config, + download_original_config, + download_single_file_checkpoint, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionControlNetPipeline + ckpt_path = ( + "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors" + ) + original_config = ( + "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml" + ) + repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self): + control_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" + ).resize((512, 512)) + inputs = { + "prompt": "bird", + "image": control_image, + "generator": torch.Generator(device="cpu").manual_seed(0), + "num_inference_steps": 3, + "output_type": "np", + } + + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe.unet.set_default_attn_processor() + pipe.enable_model_cpu_offload(device=torch_device) + + pipe_sf = self.pipeline_class.from_single_file( + self.ckpt_path, + controlnet=controlnet, + ) + pipe_sf.unet.set_default_attn_processor() + pipe_sf.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs() + output = pipe(**inputs).images[0] + + inputs = self.get_inputs() + output_sf = pipe_sf(**inputs).images[0] + + max_diff = numpy_cosine_similarity_distance(output_sf.flatten(), output.flatten()) + assert max_diff < 1e-3 + + def test_single_file_components(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained( + self.repo_id, variant="fp16", safety_checker=None, controlnet=controlnet + ) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, + safety_checker=None, + controlnet=controlnet, + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_local_files_only(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, controlnet=controlnet, local_files_only=True + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, original_config=self.original_config + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_original_config = download_original_config(self.original_config, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, original_config=local_original_config, controlnet=controlnet, local_files_only=True + ) + pipe_single_file.scheduler = pipe.scheduler + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, config=self.repo_id + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + safety_checker=None, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + config=local_diffusers_config, + controlnet=controlnet, + safety_checker=None, + local_files_only=True, + ) + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_setting_pipeline_dtype_to_fp16(self): + controlnet = ControlNetModel.from_pretrained( + "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16, variant="fp16" + ) + single_file_pipe = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 + ) + super().test_single_file_setting_pipeline_dtype_to_fp16(single_file_pipe) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_img2img_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_img2img_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..387f09471dd793ab9e3058e3f8345f19298acd93 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_img2img_single_file.py @@ -0,0 +1,103 @@ +import gc +import unittest + +import torch + +from diffusers import ( + StableDiffusionImg2ImgPipeline, +) +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import SDSingleFileTesterMixin + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionImg2ImgPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionImg2ImgPipeline + ckpt_path = ( + "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors" + ) + original_config = ( + "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml" + ) + repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "image": init_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) + + +@slow +@require_torch_accelerator +class StableDiffusion21Img2ImgPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionImg2ImgPipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-ema-pruned.safetensors" + original_config = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/v2-inference-v.yaml" + repo_id = "stabilityai/stable-diffusion-2-1" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "image": init_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_inpaint_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_inpaint_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..84636ec0f0fa2f7106771e831a4eaf379854baf1 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_inpaint_single_file.py @@ -0,0 +1,124 @@ +import gc +import unittest + +import torch + +from diffusers import ( + StableDiffusionInpaintPipeline, +) +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import SDSingleFileTesterMixin + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionInpaintPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionInpaintPipeline + ckpt_path = "https://huggingface.co/botp/stable-diffusion-v1-5-inpainting/blob/main/sd-v1-5-inpainting.ckpt" + original_config = "https://raw.githubusercontent.com/runwayml/stable-diffusion/main/configs/stable-diffusion/v1-inpainting-inference.yaml" + repo_id = "botp/stable-diffusion-v1-5-inpainting" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_mask.png" + ) + inputs = { + "prompt": "Face of a yellow cat, high resolution, sitting on a park bench", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) + + def test_single_file_loading_4_channel_unet(self): + # Test loading single file inpaint with a 4 channel UNet + ckpt_path = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors" + pipe = self.pipeline_class.from_single_file(ckpt_path) + + assert pipe.unet.config.in_channels == 4 + + @unittest.skip("runwayml original config has been removed") + def test_single_file_components_with_original_config(self): + return + + @unittest.skip("runwayml original config has been removed") + def test_single_file_components_with_original_config_local_files_only(self): + return + + +@slow +@require_torch_accelerator +class StableDiffusion21InpaintPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionInpaintPipeline + ckpt_path = ( + "https://huggingface.co/stabilityai/stable-diffusion-2-inpainting/blob/main/512-inpainting-ema.safetensors" + ) + original_config = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/v2-inpainting-inference.yaml" + repo_id = "stabilityai/stable-diffusion-2-inpainting" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_image.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_inpaint/input_bench_mask.png" + ) + inputs = { + "prompt": "Face of a yellow cat, high resolution, sitting on a park bench", + "image": init_image, + "mask_image": mask_image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..4601b75c3ab61bb9e932af2eb872385b654954b1 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_single_file.py @@ -0,0 +1,164 @@ +import gc +import tempfile +import unittest + +import torch + +from diffusers import EulerDiscreteScheduler, StableDiffusionInstructPix2PixPipeline, StableDiffusionPipeline +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + nightly, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import ( + SDSingleFileTesterMixin, + download_original_config, + download_single_file_checkpoint, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionPipeline + ckpt_path = ( + "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors" + ) + original_config = ( + "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml" + ) + repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "generator": generator, + "num_inference_steps": 2, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) + + def test_single_file_legacy_scheduler_loading(self): + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_original_config = download_original_config(self.original_config, tmpdir) + + pipe = self.pipeline_class.from_single_file( + local_ckpt_path, + original_config=local_original_config, + cache_dir=tmpdir, + local_files_only=True, + scheduler_type="euler", + ) + + # Default is PNDM for this checkpoint + assert isinstance(pipe.scheduler, EulerDiscreteScheduler) + + def test_single_file_legacy_scaling_factor(self): + new_scaling_factor = 10.0 + init_pipe = self.pipeline_class.from_single_file(self.ckpt_path) + pipe = self.pipeline_class.from_single_file(self.ckpt_path, scaling_factor=new_scaling_factor) + + assert init_pipe.vae.config.scaling_factor != new_scaling_factor + assert pipe.vae.config.scaling_factor == new_scaling_factor + + +@slow +class StableDiffusion21PipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionPipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-ema-pruned.safetensors" + original_config = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/v2-inference-v.yaml" + repo_id = "stabilityai/stable-diffusion-2-1" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "generator": generator, + "num_inference_steps": 2, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) + + +@nightly +@slow +@require_torch_accelerator +class StableDiffusionInstructPix2PixPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionInstructPix2PixPipeline + ckpt_path = "https://huggingface.co/timbrooks/instruct-pix2pix/blob/main/instruct-pix2pix-00-22000.safetensors" + original_config = ( + "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/refs/heads/main/configs/generate.yaml" + ) + repo_id = "timbrooks/instruct-pix2pix" + single_file_kwargs = {"extract_ema": True} + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_pix2pix/example.jpg" + ) + inputs = { + "prompt": "turn him into a cyborg", + "image": image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "image_guidance_scale": 1.0, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_upscale_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_upscale_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..39ec7b0194a668859c54556253bbdb1bc871a6ba --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_upscale_single_file.py @@ -0,0 +1,87 @@ +import gc +import unittest + +import pytest +import torch + +from diffusers import ( + StableDiffusionUpscalePipeline, +) +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import SDSingleFileTesterMixin + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionUpscalePipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionUpscalePipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler/blob/main/x4-upscaler-ema.safetensors" + original_config = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/x4-upscaling.yaml" + repo_id = "stabilityai/stable-diffusion-x4-upscaler" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def test_single_file_format_inference_is_same_as_pretrained(self): + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/sd2-upscale/low_res_cat.png" + ) + + prompt = "a cat sitting on a park bench" + pipe = StableDiffusionUpscalePipeline.from_pretrained(self.repo_id) + pipe.enable_model_cpu_offload(device=torch_device) + + generator = torch.Generator("cpu").manual_seed(0) + output = pipe(prompt=prompt, image=image, generator=generator, output_type="np", num_inference_steps=3) + image_from_pretrained = output.images[0] + + pipe_from_single_file = StableDiffusionUpscalePipeline.from_single_file(self.ckpt_path) + pipe_from_single_file.enable_model_cpu_offload(device=torch_device) + + generator = torch.Generator("cpu").manual_seed(0) + output_from_single_file = pipe_from_single_file( + prompt=prompt, image=image, generator=generator, output_type="np", num_inference_steps=3 + ) + image_from_single_file = output_from_single_file.images[0] + + assert image_from_pretrained.shape == (512, 512, 3) + assert image_from_single_file.shape == (512, 512, 3) + assert ( + numpy_cosine_similarity_distance(image_from_pretrained.flatten(), image_from_single_file.flatten()) < 1e-3 + ) + + @pytest.mark.xfail( + condition=True, + reason="Test fails because of mismatches in the configs but it is very hard to properly fix this considering downstream usecase.", + strict=True, + ) + def test_single_file_components_with_original_config(self): + super().test_single_file_components_with_original_config() + + @pytest.mark.xfail( + condition=True, + reason="Test fails because of mismatches in the configs but it is very hard to properly fix this considering downstream usecase.", + strict=True, + ) + def test_single_file_components_with_original_config_local_files_only(self): + super().test_single_file_components_with_original_config_local_files_only() diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_adapter_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_adapter_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..3de9ee736417fb6854ec82c691c8a64540e15928 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_adapter_single_file.py @@ -0,0 +1,213 @@ +import gc +import tempfile +import unittest + +import torch + +from diffusers import ( + StableDiffusionXLAdapterPipeline, + T2IAdapter, +) +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import ( + SDXLSingleFileTesterMixin, + download_diffusers_config, + download_original_config, + download_single_file_checkpoint, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionXLAdapterPipelineSingleFileSlowTests(unittest.TestCase, SDXLSingleFileTesterMixin): + pipeline_class = StableDiffusionXLAdapterPipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors" + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + original_config = ( + "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_base.yaml" + ) + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self): + prompt = "toy" + generator = torch.Generator(device="cpu").manual_seed(0) + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png" + ) + + inputs = { + "prompt": prompt, + "image": image, + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 7.5, + "output_type": "np", + } + + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe_single_file = StableDiffusionXLAdapterPipeline.from_single_file( + self.ckpt_path, + adapter=adapter, + torch_dtype=torch.float16, + safety_checker=None, + ) + pipe_single_file.enable_model_cpu_offload(device=torch_device) + pipe_single_file.set_progress_bar_config(disable=None) + + inputs = self.get_inputs() + images_single_file = pipe_single_file(**inputs).images[0] + + pipe = StableDiffusionXLAdapterPipeline.from_pretrained( + self.repo_id, + adapter=adapter, + torch_dtype=torch.float16, + safety_checker=None, + ) + pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs() + images = pipe(**inputs).images[0] + + assert images_single_file.shape == (768, 512, 3) + assert images.shape == (768, 512, 3) + + max_diff = numpy_cosine_similarity_distance(images.flatten(), images_single_file.flatten()) + assert max_diff < 5e-3 + + def test_single_file_components(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + adapter=adapter, + torch_dtype=torch.float16, + ) + + pipe_single_file = self.pipeline_class.from_single_file(self.ckpt_path, safety_checker=None, adapter=adapter) + super().test_single_file_components(pipe, pipe_single_file) + + def test_single_file_components_local_files_only(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + adapter=adapter, + torch_dtype=torch.float16, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + single_file_pipe = self.pipeline_class.from_single_file( + local_ckpt_path, adapter=adapter, safety_checker=None, local_files_only=True + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_diffusers_config(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + adapter=adapter, + torch_dtype=torch.float16, + safety_checker=None, + ) + + pipe_single_file = self.pipeline_class.from_single_file(self.ckpt_path, config=self.repo_id, adapter=adapter) + self._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config_local_files_only(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + adapter=adapter, + torch_dtype=torch.float16, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + config=local_diffusers_config, + adapter=adapter, + safety_checker=None, + local_files_only=True, + ) + self._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + adapter=adapter, + torch_dtype=torch.float16, + safety_checker=None, + ) + + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, original_config=self.original_config, adapter=adapter + ) + self._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config_local_files_only(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + adapter=adapter, + torch_dtype=torch.float16, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_original_config = download_original_config(self.original_config, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + original_config=local_original_config, + adapter=adapter, + safety_checker=None, + local_files_only=True, + ) + self._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_setting_pipeline_dtype_to_fp16(self): + adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16) + + single_file_pipe = self.pipeline_class.from_single_file( + self.ckpt_path, adapter=adapter, torch_dtype=torch.float16 + ) + super().test_single_file_setting_pipeline_dtype_to_fp16(single_file_pipe) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_controlnet_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_controlnet_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a1aba1030fa8858e1274147bc1af4ce7beacb0 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_controlnet_single_file.py @@ -0,0 +1,208 @@ +import gc +import tempfile +import unittest + +import torch + +from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline +from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import ( + SDXLSingleFileTesterMixin, + download_diffusers_config, + download_single_file_checkpoint, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionXLControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDXLSingleFileTesterMixin): + pipeline_class = StableDiffusionXLControlNetPipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors" + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + original_config = ( + "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_base.yaml" + ) + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/stormtrooper_depth.png" + ) + inputs = { + "prompt": "Stormtrooper's lecture", + "image": image, + "generator": generator, + "num_inference_steps": 2, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, torch_dtype=torch.float16 + ) + pipe_single_file.unet.set_default_attn_processor() + pipe_single_file.enable_model_cpu_offload(device=torch_device) + pipe_single_file.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + single_file_images = pipe_single_file(**inputs).images[0] + + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet, torch_dtype=torch.float16) + pipe.unet.set_default_attn_processor() + pipe.enable_model_cpu_offload(device=torch_device) + + inputs = self.get_inputs(torch_device) + images = pipe(**inputs).images[0] + + assert images.shape == (512, 512, 3) + assert single_file_images.shape == (512, 512, 3) + + max_diff = numpy_cosine_similarity_distance(images[0].flatten(), single_file_images[0].flatten()) + assert max_diff < 5e-2 + + def test_single_file_components(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + controlnet=controlnet, + torch_dtype=torch.float16, + ) + + pipe_single_file = self.pipeline_class.from_single_file(self.ckpt_path, controlnet=controlnet) + super().test_single_file_components(pipe, pipe_single_file) + + def test_single_file_components_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + controlnet=controlnet, + torch_dtype=torch.float16, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + single_file_pipe = self.pipeline_class.from_single_file( + local_ckpt_path, controlnet=controlnet, safety_checker=None, local_files_only=True + ) + + self._compare_component_configs(pipe, single_file_pipe) + + def test_single_file_components_with_original_config(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + controlnet=controlnet, + torch_dtype=torch.float16, + ) + + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, + original_config=self.original_config, + controlnet=controlnet, + ) + self._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_original_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + variant="fp16", + controlnet=controlnet, + torch_dtype=torch.float16, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + safety_checker=None, + controlnet=controlnet, + local_files_only=True, + ) + self._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet) + pipe_single_file = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, config=self.repo_id + ) + + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_components_with_diffusers_config_local_files_only(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = self.pipeline_class.from_pretrained( + self.repo_id, + controlnet=controlnet, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + repo_id, weight_name = _extract_repo_id_and_weights_name(self.ckpt_path) + local_ckpt_path = download_single_file_checkpoint(repo_id, weight_name, tmpdir) + local_diffusers_config = download_diffusers_config(self.repo_id, tmpdir) + + pipe_single_file = self.pipeline_class.from_single_file( + local_ckpt_path, + config=local_diffusers_config, + safety_checker=None, + controlnet=controlnet, + local_files_only=True, + ) + super()._compare_component_configs(pipe, pipe_single_file) + + def test_single_file_setting_pipeline_dtype_to_fp16(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + single_file_pipe = self.pipeline_class.from_single_file( + self.ckpt_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 + ) + super().test_single_file_setting_pipeline_dtype_to_fp16(single_file_pipe) diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_img2img_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_img2img_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..810f412f8def1bad7ffa5f62e1f5f7da77f33379 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_img2img_single_file.py @@ -0,0 +1,107 @@ +import gc +import unittest + +import torch + +from diffusers import ( + DDIMScheduler, + StableDiffusionXLImg2ImgPipeline, +) +from diffusers.utils import load_image + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + numpy_cosine_similarity_distance, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import SDXLSingleFileTesterMixin + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionXLImg2ImgPipelineSingleFileSlowTests(unittest.TestCase, SDXLSingleFileTesterMixin): + pipeline_class = StableDiffusionXLImg2ImgPipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors" + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + original_config = ( + "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_base.yaml" + ) + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "image": init_image, + "generator": generator, + "num_inference_steps": 3, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) + + +@slow +@require_torch_accelerator +class StableDiffusionXLImg2ImgRefinerPipelineSingleFileSlowTests(unittest.TestCase): + pipeline_class = StableDiffusionXLImg2ImgPipeline + ckpt_path = ( + "https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/blob/main/sd_xl_refiner_1.0.safetensors" + ) + repo_id = "stabilityai/stable-diffusion-xl-refiner-1.0" + original_config = ( + "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_refiner.yaml" + ) + + def test_single_file_format_inference_is_same_as_pretrained(self): + init_image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main" + "/stable_diffusion_img2img/sketch-mountains-input.png" + ) + + pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + pipe.unet.set_default_attn_processor() + pipe.enable_model_cpu_offload(device=torch_device) + + generator = torch.Generator(device="cpu").manual_seed(0) + image = pipe( + prompt="mountains", image=init_image, num_inference_steps=5, generator=generator, output_type="np" + ).images[0] + + pipe_single_file = self.pipeline_class.from_single_file(self.ckpt_path, torch_dtype=torch.float16) + pipe_single_file.scheduler = DDIMScheduler.from_config(pipe_single_file.scheduler.config) + pipe_single_file.unet.set_default_attn_processor() + pipe_single_file.enable_model_cpu_offload(device=torch_device) + + generator = torch.Generator(device="cpu").manual_seed(0) + image_single_file = pipe_single_file( + prompt="mountains", image=init_image, num_inference_steps=5, generator=generator, output_type="np" + ).images[0] + + max_diff = numpy_cosine_similarity_distance(image.flatten(), image_single_file.flatten()) + + assert max_diff < 5e-4 diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_instruct_pix2pix.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_instruct_pix2pix.py new file mode 100644 index 0000000000000000000000000000000000000000..011d59222a5b0c656e74c8c84d9ce946aa62761d --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_instruct_pix2pix.py @@ -0,0 +1,53 @@ +import gc +import unittest + +import torch + +from diffusers import StableDiffusionXLInstructPix2PixPipeline + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionXLInstructPix2PixPipeline(unittest.TestCase): + pipeline_class = StableDiffusionXLInstructPix2PixPipeline + ckpt_path = "https://huggingface.co/stabilityai/cosxl/blob/main/cosxl_edit.safetensors" + original_config = None + repo_id = "diffusers/sdxl-instructpix2pix-768" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "generator": generator, + "num_inference_steps": 2, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_setting_cosxl_edit(self): + # Default is PNDM for this checkpoint + pipe = self.pipeline_class.from_single_file(self.ckpt_path, config=self.repo_id, is_cosxl_edit=True) + assert pipe.is_cosxl_edit is True diff --git a/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_single_file.py b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..0ad180de17dbb323b2e1e4a61b01238cbe5c76d8 --- /dev/null +++ b/pythonProject/diffusers-main/tests/single_file/test_stable_diffusion_xl_single_file.py @@ -0,0 +1,56 @@ +import gc +import unittest + +import torch + +from diffusers import ( + StableDiffusionXLPipeline, +) + +from ..testing_utils import ( + backend_empty_cache, + enable_full_determinism, + require_torch_accelerator, + slow, + torch_device, +) +from .single_file_testing_utils import SDXLSingleFileTesterMixin + + +enable_full_determinism() + + +@slow +@require_torch_accelerator +class StableDiffusionXLPipelineSingleFileSlowTests(unittest.TestCase, SDXLSingleFileTesterMixin): + pipeline_class = StableDiffusionXLPipeline + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors" + repo_id = "stabilityai/stable-diffusion-xl-base-1.0" + original_config = ( + "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_base.yaml" + ) + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + inputs = { + "prompt": "a fantasy landscape, concept art, high resolution", + "generator": generator, + "num_inference_steps": 2, + "strength": 0.75, + "guidance_scale": 7.5, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) diff --git a/pythonProject/diffusers-main/utils/check_config_docstrings.py b/pythonProject/diffusers-main/utils/check_config_docstrings.py new file mode 100644 index 0000000000000000000000000000000000000000..d39fe6a618d4d7e0476f32578a488aa3402abde6 --- /dev/null +++ b/pythonProject/diffusers-main/utils/check_config_docstrings.py @@ -0,0 +1,84 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import inspect +import os +import re + + +# All paths are set with the intent you should run this script from the root of the repo with the command +# python utils/check_config_docstrings.py +PATH_TO_TRANSFORMERS = "src/transformers" + + +# This is to make sure the transformers module imported is the one in the repo. +spec = importlib.util.spec_from_file_location( + "transformers", + os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), + submodule_search_locations=[PATH_TO_TRANSFORMERS], +) +transformers = spec.loader.load_module() + +CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING + +# Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`. +# For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)` +_re_checkpoint = re.compile(r"\[(.+?)\]\((https://huggingface\.co/.+?)\)") + + +CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = { + "CLIPConfigMixin", + "DecisionTransformerConfigMixin", + "EncoderDecoderConfigMixin", + "RagConfigMixin", + "SpeechEncoderDecoderConfigMixin", + "VisionEncoderDecoderConfigMixin", + "VisionTextDualEncoderConfigMixin", +} + + +def check_config_docstrings_have_checkpoints(): + configs_without_checkpoint = [] + + for config_class in list(CONFIG_MAPPING.values()): + checkpoint_found = False + + # source code of `config_class` + config_source = inspect.getsource(config_class) + checkpoints = _re_checkpoint.findall(config_source) + + for checkpoint in checkpoints: + # Each `checkpoint` is a tuple of a checkpoint name and a checkpoint link. + # For example, `('bert-base-uncased', 'https://huggingface.co/bert-base-uncased')` + ckpt_name, ckpt_link = checkpoint + + # verify the checkpoint name corresponds to the checkpoint link + ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}" + if ckpt_link == ckpt_link_from_name: + checkpoint_found = True + break + + name = config_class.__name__ + if not checkpoint_found and name not in CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK: + configs_without_checkpoint.append(name) + + if len(configs_without_checkpoint) > 0: + message = "\n".join(sorted(configs_without_checkpoint)) + raise ValueError(f"The following configurations don't contain any valid checkpoint:\n{message}") + + +if __name__ == "__main__": + check_config_docstrings_have_checkpoints() diff --git a/pythonProject/diffusers-main/utils/check_copies.py b/pythonProject/diffusers-main/utils/check_copies.py new file mode 100644 index 0000000000000000000000000000000000000000..001366c1905f5fda8a8e22f38b532013657e8ee3 --- /dev/null +++ b/pythonProject/diffusers-main/utils/check_copies.py @@ -0,0 +1,222 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import glob +import os +import re +import subprocess + + +# All paths are set with the intent you should run this script from the root of the repo with the command +# python utils/check_copies.py +DIFFUSERS_PATH = "src/diffusers" +REPO_PATH = "." + + +def _should_continue(line, indent): + return line.startswith(indent) or len(line) <= 1 or re.search(r"^\s*\)(\s*->.*:|:)\s*$", line) is not None + + +def find_code_in_diffusers(object_name): + """Find and return the code source code of `object_name`.""" + parts = object_name.split(".") + i = 0 + + # First let's find the module where our object lives. + module = parts[i] + while i < len(parts) and not os.path.isfile(os.path.join(DIFFUSERS_PATH, f"{module}.py")): + i += 1 + if i < len(parts): + module = os.path.join(module, parts[i]) + if i >= len(parts): + raise ValueError(f"`object_name` should begin with the name of a module of diffusers but got {object_name}.") + + with open( + os.path.join(DIFFUSERS_PATH, f"{module}.py"), + "r", + encoding="utf-8", + newline="\n", + ) as f: + lines = f.readlines() + + # Now let's find the class / func in the code! + indent = "" + line_index = 0 + for name in parts[i + 1 :]: + while ( + line_index < len(lines) and re.search(rf"^{indent}(class|def)\s+{name}(\(|\:)", lines[line_index]) is None + ): + line_index += 1 + indent += " " + line_index += 1 + + if line_index >= len(lines): + raise ValueError(f" {object_name} does not match any function or class in {module}.") + + # We found the beginning of the class / func, now let's find the end (when the indent diminishes). + start_index = line_index + while line_index < len(lines) and _should_continue(lines[line_index], indent): + line_index += 1 + # Clean up empty lines at the end (if any). + while len(lines[line_index - 1]) <= 1: + line_index -= 1 + + code_lines = lines[start_index:line_index] + return "".join(code_lines) + + +_re_copy_warning = re.compile(r"^(\s*)#\s*Copied from\s+diffusers\.(\S+\.\S+)\s*($|\S.*$)") +_re_replace_pattern = re.compile(r"^\s*(\S+)->(\S+)(\s+.*|$)") +_re_fill_pattern = re.compile(r"]*>") + + +def get_indent(code): + lines = code.split("\n") + idx = 0 + while idx < len(lines) and len(lines[idx]) == 0: + idx += 1 + if idx < len(lines): + return re.search(r"^(\s*)\S", lines[idx]).groups()[0] + return "" + + +def run_ruff(code): + command = ["ruff", "format", "-", "--config", "pyproject.toml", "--silent"] + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, _ = process.communicate(input=code.encode()) + return stdout.decode() + + +def stylify(code: str) -> str: + """ + Applies the ruff part of our `make style` command to some code. This formats the code using `ruff format`. + As `ruff` does not provide a python api this cannot be done on the fly. + + Args: + code (`str`): The code to format. + + Returns: + `str`: The formatted code. + """ + has_indent = len(get_indent(code)) > 0 + if has_indent: + code = f"class Bla:\n{code}" + formatted_code = run_ruff(code) + return formatted_code[len("class Bla:\n") :] if has_indent else formatted_code + + +def is_copy_consistent(filename, overwrite=False): + """ + Check if the code commented as a copy in `filename` matches the original. + Return the differences or overwrites the content depending on `overwrite`. + """ + with open(filename, "r", encoding="utf-8", newline="\n") as f: + lines = f.readlines() + diffs = [] + line_index = 0 + # Not a for loop cause `lines` is going to change (if `overwrite=True`). + while line_index < len(lines): + search = _re_copy_warning.search(lines[line_index]) + if search is None: + line_index += 1 + continue + + # There is some copied code here, let's retrieve the original. + indent, object_name, replace_pattern = search.groups() + theoretical_code = find_code_in_diffusers(object_name) + theoretical_indent = get_indent(theoretical_code) + + start_index = line_index + 1 if indent == theoretical_indent else line_index + 2 + indent = theoretical_indent + line_index = start_index + + # Loop to check the observed code, stop when indentation diminishes or if we see a End copy comment. + should_continue = True + while line_index < len(lines) and should_continue: + line_index += 1 + if line_index >= len(lines): + break + line = lines[line_index] + should_continue = _should_continue(line, indent) and re.search(f"^{indent}# End copy", line) is None + # Clean up empty lines at the end (if any). + while len(lines[line_index - 1]) <= 1: + line_index -= 1 + + observed_code_lines = lines[start_index:line_index] + observed_code = "".join(observed_code_lines) + + # Remove any nested `Copied from` comments to avoid circular copies + theoretical_code = [line for line in theoretical_code.split("\n") if _re_copy_warning.search(line) is None] + theoretical_code = "\n".join(theoretical_code) + + # Before comparing, use the `replace_pattern` on the original code. + if len(replace_pattern) > 0: + patterns = replace_pattern.replace("with", "").split(",") + patterns = [_re_replace_pattern.search(p) for p in patterns] + for pattern in patterns: + if pattern is None: + continue + obj1, obj2, option = pattern.groups() + theoretical_code = re.sub(obj1, obj2, theoretical_code) + if option.strip() == "all-casing": + theoretical_code = re.sub(obj1.lower(), obj2.lower(), theoretical_code) + theoretical_code = re.sub(obj1.upper(), obj2.upper(), theoretical_code) + + # stylify after replacement. To be able to do that, we need the header (class or function definition) + # from the previous line + theoretical_code = stylify(lines[start_index - 1] + theoretical_code) + theoretical_code = theoretical_code[len(lines[start_index - 1]) :] + + # Test for a diff and act accordingly. + if observed_code != theoretical_code: + diffs.append([object_name, start_index]) + if overwrite: + lines = lines[:start_index] + [theoretical_code] + lines[line_index:] + line_index = start_index + 1 + + if overwrite and len(diffs) > 0: + # Warn the user a file has been modified. + print(f"Detected changes, rewriting {filename}.") + with open(filename, "w", encoding="utf-8", newline="\n") as f: + f.writelines(lines) + return diffs + + +def check_copies(overwrite: bool = False): + all_files = glob.glob(os.path.join(DIFFUSERS_PATH, "**/*.py"), recursive=True) + diffs = [] + for filename in all_files: + new_diffs = is_copy_consistent(filename, overwrite) + diffs += [f"- {filename}: copy does not match {d[0]} at line {d[1]}" for d in new_diffs] + if not overwrite and len(diffs) > 0: + diff = "\n".join(diffs) + raise Exception( + "Found the following copy inconsistencies:\n" + + diff + + "\nRun `make fix-copies` or `python utils/check_copies.py --fix_and_overwrite` to fix them." + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--fix_and_overwrite", + action="store_true", + help="Whether to fix inconsistencies.", + ) + args = parser.parse_args() + + check_copies(args.fix_and_overwrite) diff --git a/pythonProject/diffusers-main/utils/check_doc_toc.py b/pythonProject/diffusers-main/utils/check_doc_toc.py new file mode 100644 index 0000000000000000000000000000000000000000..0dd02cde86c1286c7fb2cc00d469d31714ce3a09 --- /dev/null +++ b/pythonProject/diffusers-main/utils/check_doc_toc.py @@ -0,0 +1,210 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from collections import defaultdict + +import yaml + + +PATH_TO_TOC = "docs/source/en/_toctree.yml" + + +def clean_doc_toc(doc_list): + """ + Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically. + """ + counts = defaultdict(int) + overview_doc = [] + new_doc_list = [] + for doc in doc_list: + if "local" in doc: + counts[doc["local"]] += 1 + + if doc["title"].lower() == "overview": + overview_doc.append({"local": doc["local"], "title": doc["title"]}) + else: + new_doc_list.append(doc) + + doc_list = new_doc_list + duplicates = [key for key, value in counts.items() if value > 1] + + new_doc = [] + for duplicate_key in duplicates: + titles = list({doc["title"] for doc in doc_list if doc["local"] == duplicate_key}) + if len(titles) > 1: + raise ValueError( + f"{duplicate_key} is present several times in the documentation table of content at " + "`docs/source/en/_toctree.yml` with different *Title* values. Choose one of those and remove the " + "others." + ) + # Only add this once + new_doc.append({"local": duplicate_key, "title": titles[0]}) + + # Add none duplicate-keys + new_doc.extend([doc for doc in doc_list if "local" not in counts or counts[doc["local"]] == 1]) + new_doc = sorted(new_doc, key=lambda s: s["title"].lower()) + + # "overview" gets special treatment and is always first + if len(overview_doc) > 1: + raise ValueError("{doc_list} has two 'overview' docs which is not allowed.") + + overview_doc.extend(new_doc) + + # Sort + return overview_doc + + +def check_scheduler_doc(overwrite=False): + with open(PATH_TO_TOC, encoding="utf-8") as f: + content = yaml.safe_load(f.read()) + + # Get to the API doc + api_idx = 0 + while content[api_idx]["title"] != "API": + api_idx += 1 + api_doc = content[api_idx]["sections"] + + # Then to the model doc + scheduler_idx = 0 + while api_doc[scheduler_idx]["title"] != "Schedulers": + scheduler_idx += 1 + + scheduler_doc = api_doc[scheduler_idx]["sections"] + new_scheduler_doc = clean_doc_toc(scheduler_doc) + + diff = False + if new_scheduler_doc != scheduler_doc: + diff = True + if overwrite: + api_doc[scheduler_idx]["sections"] = new_scheduler_doc + + if diff: + if overwrite: + content[api_idx]["sections"] = api_doc + with open(PATH_TO_TOC, "w", encoding="utf-8") as f: + f.write(yaml.dump(content, allow_unicode=True)) + else: + raise ValueError( + "The model doc part of the table of content is not properly sorted, run `make style` to fix this." + ) + + +def check_pipeline_doc(overwrite=False): + with open(PATH_TO_TOC, encoding="utf-8") as f: + content = yaml.safe_load(f.read()) + + # Get to the API doc + api_idx = 0 + while content[api_idx]["title"] != "API": + api_idx += 1 + api_doc = content[api_idx]["sections"] + + # Then to the model doc + pipeline_idx = 0 + while api_doc[pipeline_idx]["title"] != "Pipelines": + pipeline_idx += 1 + + diff = False + pipeline_docs = api_doc[pipeline_idx]["sections"] + new_pipeline_docs = [] + + # sort sub pipeline docs + for pipeline_doc in pipeline_docs: + if "sections" in pipeline_doc: + sub_pipeline_doc = pipeline_doc["sections"] + new_sub_pipeline_doc = clean_doc_toc(sub_pipeline_doc) + if new_sub_pipeline_doc != sub_pipeline_doc: + diff = True + if overwrite: + pipeline_doc["sections"] = new_sub_pipeline_doc + new_pipeline_docs.append(pipeline_doc) + + # sort overall pipeline doc + new_pipeline_docs = clean_doc_toc(new_pipeline_docs) + + if new_pipeline_docs != pipeline_docs: + diff = True + if overwrite: + api_doc[pipeline_idx]["sections"] = new_pipeline_docs + + if diff: + if overwrite: + content[api_idx]["sections"] = api_doc + with open(PATH_TO_TOC, "w", encoding="utf-8") as f: + f.write(yaml.dump(content, allow_unicode=True)) + else: + raise ValueError( + "The model doc part of the table of content is not properly sorted, run `make style` to fix this." + ) + + +def check_model_doc(overwrite=False): + with open(PATH_TO_TOC, encoding="utf-8") as f: + content = yaml.safe_load(f.read()) + + # Get to the API doc + api_idx = 0 + while content[api_idx]["title"] != "API": + api_idx += 1 + api_doc = content[api_idx]["sections"] + + # Then to the model doc + model_idx = 0 + while api_doc[model_idx]["title"] != "Models": + model_idx += 1 + + diff = False + model_docs = api_doc[model_idx]["sections"] + new_model_docs = [] + + # sort sub model docs + for model_doc in model_docs: + if "sections" in model_doc: + sub_model_doc = model_doc["sections"] + new_sub_model_doc = clean_doc_toc(sub_model_doc) + if new_sub_model_doc != sub_model_doc: + diff = True + if overwrite: + model_doc["sections"] = new_sub_model_doc + new_model_docs.append(model_doc) + + # sort overall model doc + new_model_docs = clean_doc_toc(new_model_docs) + + if new_model_docs != model_docs: + diff = True + if overwrite: + api_doc[model_idx]["sections"] = new_model_docs + + if diff: + if overwrite: + content[api_idx]["sections"] = api_doc + with open(PATH_TO_TOC, "w", encoding="utf-8") as f: + f.write(yaml.dump(content, allow_unicode=True)) + else: + raise ValueError( + "The model doc part of the table of content is not properly sorted, run `make style` to fix this." + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") + args = parser.parse_args() + + check_scheduler_doc(args.fix_and_overwrite) + check_pipeline_doc(args.fix_and_overwrite) + check_model_doc(args.fix_and_overwrite) diff --git a/pythonProject/diffusers-main/utils/check_dummies.py b/pythonProject/diffusers-main/utils/check_dummies.py new file mode 100644 index 0000000000000000000000000000000000000000..04a670c2f5d91c76b32b2c6d200b342675df6bb0 --- /dev/null +++ b/pythonProject/diffusers-main/utils/check_dummies.py @@ -0,0 +1,175 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import re + + +# All paths are set with the intent you should run this script from the root of the repo with the command +# python utils/check_dummies.py +PATH_TO_DIFFUSERS = "src/diffusers" + +# Matches is_xxx_available() +_re_backend = re.compile(r"is\_([a-z_]*)_available\(\)") +# Matches from xxx import bla +_re_single_line_import = re.compile(r"\s+from\s+\S*\s+import\s+([^\(\s].*)\n") + + +DUMMY_CONSTANT = """ +{0} = None +""" + +DUMMY_CLASS = """ +class {0}(metaclass=DummyObject): + _backends = {1} + + def __init__(self, *args, **kwargs): + requires_backends(self, {1}) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, {1}) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, {1}) +""" + + +DUMMY_FUNCTION = """ +def {0}(*args, **kwargs): + requires_backends({0}, {1}) +""" + + +def find_backend(line): + """Find one (or multiple) backend in a code line of the init.""" + backends = _re_backend.findall(line) + if len(backends) == 0: + return None + + return "_and_".join(backends) + + +def read_init(): + """Read the init and extracts PyTorch, TensorFlow, SentencePiece and Tokenizers objects.""" + with open(os.path.join(PATH_TO_DIFFUSERS, "__init__.py"), "r", encoding="utf-8", newline="\n") as f: + lines = f.readlines() + + # Get to the point we do the actual imports for type checking + line_index = 0 + while not lines[line_index].startswith("if TYPE_CHECKING"): + line_index += 1 + + backend_specific_objects = {} + # Go through the end of the file + while line_index < len(lines): + # If the line contains is_backend_available, we grab all objects associated with the `else` block + backend = find_backend(lines[line_index]) + if backend is not None: + while not lines[line_index].startswith(" else:"): + line_index += 1 + line_index += 1 + objects = [] + # Until we unindent, add backend objects to the list + while len(lines[line_index]) <= 1 or lines[line_index].startswith(" " * 8): + line = lines[line_index] + single_line_import_search = _re_single_line_import.search(line) + if single_line_import_search is not None: + objects.extend(single_line_import_search.groups()[0].split(", ")) + elif line.startswith(" " * 12): + objects.append(line[12:-2]) + line_index += 1 + + if len(objects) > 0: + backend_specific_objects[backend] = objects + else: + line_index += 1 + + return backend_specific_objects + + +def create_dummy_object(name, backend_name): + """Create the code for the dummy object corresponding to `name`.""" + if name.isupper(): + return DUMMY_CONSTANT.format(name) + elif name.islower(): + return DUMMY_FUNCTION.format(name, backend_name) + else: + return DUMMY_CLASS.format(name, backend_name) + + +def create_dummy_files(backend_specific_objects=None): + """Create the content of the dummy files.""" + if backend_specific_objects is None: + backend_specific_objects = read_init() + # For special correspondence backend to module name as used in the function requires_modulename + dummy_files = {} + + for backend, objects in backend_specific_objects.items(): + backend_name = "[" + ", ".join(f'"{b}"' for b in backend.split("_and_")) + "]" + dummy_file = "# This file is autogenerated by the command `make fix-copies`, do not edit.\n" + dummy_file += "from ..utils import DummyObject, requires_backends\n\n" + dummy_file += "\n".join([create_dummy_object(o, backend_name) for o in objects]) + dummy_files[backend] = dummy_file + + return dummy_files + + +def check_dummies(overwrite=False): + """Check if the dummy files are up to date and maybe `overwrite` with the right content.""" + dummy_files = create_dummy_files() + # For special correspondence backend to shortcut as used in utils/dummy_xxx_objects.py + short_names = {"torch": "pt"} + + # Locate actual dummy modules and read their content. + path = os.path.join(PATH_TO_DIFFUSERS, "utils") + dummy_file_paths = { + backend: os.path.join(path, f"dummy_{short_names.get(backend, backend)}_objects.py") + for backend in dummy_files.keys() + } + + actual_dummies = {} + for backend, file_path in dummy_file_paths.items(): + if os.path.isfile(file_path): + with open(file_path, "r", encoding="utf-8", newline="\n") as f: + actual_dummies[backend] = f.read() + else: + actual_dummies[backend] = "" + + for backend in dummy_files.keys(): + if dummy_files[backend] != actual_dummies[backend]: + if overwrite: + print( + f"Updating diffusers.utils.dummy_{short_names.get(backend, backend)}_objects.py as the main " + "__init__ has new objects." + ) + with open(dummy_file_paths[backend], "w", encoding="utf-8", newline="\n") as f: + f.write(dummy_files[backend]) + else: + raise ValueError( + "The main __init__ has objects that are not present in " + f"diffusers.utils.dummy_{short_names.get(backend, backend)}_objects.py. Run `make fix-copies` " + "to fix this." + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") + args = parser.parse_args() + + check_dummies(args.fix_and_overwrite)