| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import sys |
| import unittest |
|
|
| from transformers import AutoTokenizer, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel |
|
|
| from diffusers import FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline |
| from diffusers.utils.testing_utils import is_peft_available, require_peft_backend, require_torch_gpu, torch_device |
|
|
|
|
| if is_peft_available(): |
| pass |
|
|
| sys.path.append(".") |
|
|
| from utils import PeftLoraLoaderMixinTests |
|
|
|
|
| @require_peft_backend |
| class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): |
| pipeline_class = StableDiffusion3Pipeline |
| scheduler_cls = FlowMatchEulerDiscreteScheduler |
| scheduler_kwargs = {} |
| scheduler_classes = [FlowMatchEulerDiscreteScheduler] |
| transformer_kwargs = { |
| "sample_size": 32, |
| "patch_size": 1, |
| "in_channels": 4, |
| "num_layers": 1, |
| "attention_head_dim": 8, |
| "num_attention_heads": 4, |
| "caption_projection_dim": 32, |
| "joint_attention_dim": 32, |
| "pooled_projection_dim": 64, |
| "out_channels": 4, |
| } |
| transformer_cls = SD3Transformer2DModel |
| vae_kwargs = { |
| "sample_size": 32, |
| "in_channels": 3, |
| "out_channels": 3, |
| "block_out_channels": (4,), |
| "layers_per_block": 1, |
| "latent_channels": 4, |
| "norm_num_groups": 1, |
| "use_quant_conv": False, |
| "use_post_quant_conv": False, |
| "shift_factor": 0.0609, |
| "scaling_factor": 1.5035, |
| } |
| has_three_text_encoders = True |
| tokenizer_cls, tokenizer_id = CLIPTokenizer, "hf-internal-testing/tiny-random-clip" |
| tokenizer_2_cls, tokenizer_2_id = CLIPTokenizer, "hf-internal-testing/tiny-random-clip" |
| tokenizer_3_cls, tokenizer_3_id = AutoTokenizer, "hf-internal-testing/tiny-random-t5" |
| text_encoder_cls, text_encoder_id = CLIPTextModelWithProjection, "hf-internal-testing/tiny-sd3-text_encoder" |
| text_encoder_2_cls, text_encoder_2_id = CLIPTextModelWithProjection, "hf-internal-testing/tiny-sd3-text_encoder-2" |
| text_encoder_3_cls, text_encoder_3_id = T5EncoderModel, "hf-internal-testing/tiny-random-t5" |
|
|
| @property |
| def output_shape(self): |
| return (1, 32, 32, 3) |
|
|
| @require_torch_gpu |
| def test_sd3_lora(self): |
| """ |
| Test loading the loras that are saved with the diffusers and peft formats. |
| Related PR: https://github.com/huggingface/diffusers/pull/8584 |
| """ |
| components = self.get_dummy_components() |
| pipe = self.pipeline_class(**components[0]) |
| pipe = pipe.to(torch_device) |
| pipe.set_progress_bar_config(disable=None) |
|
|
| lora_model_id = "hf-internal-testing/tiny-sd3-loras" |
|
|
| lora_filename = "lora_diffusers_format.safetensors" |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) |
| pipe.unload_lora_weights() |
|
|
| lora_filename = "lora_peft_format.safetensors" |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) |
|
|
| @unittest.skip("Not supported in SD3.") |
| def test_simple_inference_with_text_denoiser_block_scale(self): |
| pass |
|
|
| @unittest.skip("Not supported in SD3.") |
| def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self): |
| pass |
|
|
| @unittest.skip("Not supported in SD3.") |
| def test_simple_inference_with_text_denoiser_block_scale_for_all_dict_options(self): |
| pass |
|
|
| @unittest.skip("Not supported in SD3.") |
| def test_modify_padding_mode(self): |
| pass |
|
|