Spaces:
Runtime error
Runtime error
Update txt2panoimg/pipeline_sr.py
Browse files- txt2panoimg/pipeline_sr.py +1 -18
txt2panoimg/pipeline_sr.py
CHANGED
|
@@ -1,8 +1,3 @@
|
|
| 1 |
-
# Copyright Β© Alibaba, Inc. and its affiliates.
|
| 2 |
-
# The implementation here is modifed based on diffusers.StableDiffusionControlNetImg2ImgPipeline,
|
| 3 |
-
# originally Apache 2.0 License and public available at
|
| 4 |
-
# https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
|
| 5 |
-
|
| 6 |
import copy
|
| 7 |
import re
|
| 8 |
from typing import Any, Callable, Dict, List, Optional, Union
|
|
@@ -53,7 +48,6 @@ EXAMPLE_DOC_STRING = """
|
|
| 53 |
... width=1536,
|
| 54 |
... control_image=image,
|
| 55 |
... ).images[0]
|
| 56 |
-
|
| 57 |
```
|
| 58 |
"""
|
| 59 |
|
|
@@ -141,7 +135,6 @@ def get_prompts_with_weights(pipe: DiffusionPipeline, prompt: List[str],
|
|
| 141 |
max_length: int):
|
| 142 |
r"""
|
| 143 |
Tokenize a list of prompts and return its tokens with weights of each token.
|
| 144 |
-
|
| 145 |
No padding, starting or ending token is included.
|
| 146 |
"""
|
| 147 |
tokens = []
|
|
@@ -265,9 +258,7 @@ def get_weighted_text_embeddings(
|
|
| 265 |
Prompts can be assigned with local weights using brackets. For example,
|
| 266 |
prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
|
| 267 |
and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
|
| 268 |
-
|
| 269 |
Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
|
| 270 |
-
|
| 271 |
Args:
|
| 272 |
pipe (`DiffusionPipeline`):
|
| 273 |
Pipe to provide access to the tokenizer and the text encoder.
|
|
@@ -434,13 +425,10 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
| 434 |
StableDiffusionControlNetImg2ImgPipeline):
|
| 435 |
r"""
|
| 436 |
Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance.
|
| 437 |
-
|
| 438 |
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
| 439 |
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
| 440 |
-
|
| 441 |
In addition the pipeline inherits the following loading methods:
|
| 442 |
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
| 443 |
-
|
| 444 |
Args:
|
| 445 |
vae ([`AutoencoderKL`]):
|
| 446 |
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
|
@@ -610,7 +598,6 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
| 610 |
):
|
| 611 |
r"""
|
| 612 |
Encodes the prompt into text encoder hidden states.
|
| 613 |
-
|
| 614 |
Args:
|
| 615 |
prompt (`str` or `list(int)`):
|
| 616 |
prompt to be encoded
|
|
@@ -813,7 +800,6 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
| 813 |
):
|
| 814 |
r"""
|
| 815 |
Function invoked when calling the pipeline for generation.
|
| 816 |
-
|
| 817 |
Args:
|
| 818 |
prompt (`str` or `List[str]`, *optional*):
|
| 819 |
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
|
@@ -889,9 +875,7 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
| 889 |
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
| 890 |
context_size ('int', *optional*, defaults to '768'):
|
| 891 |
tiled size when denoise the latents.
|
| 892 |
-
|
| 893 |
Examples:
|
| 894 |
-
|
| 895 |
Returns:
|
| 896 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
| 897 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
|
|
@@ -906,7 +890,6 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
| 906 |
return_dict: bool = True
|
| 907 |
) -> Union[DecoderOutput, torch.FloatTensor]:
|
| 908 |
r"""Decode a batch of images using a tiled decoder.
|
| 909 |
-
|
| 910 |
Args:
|
| 911 |
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
| 912 |
steps. This is useful to keep memory use constant regardless of image size. The end result of tiled
|
|
@@ -1199,4 +1182,4 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
| 1199 |
return (image, has_nsfw_concept)
|
| 1200 |
|
| 1201 |
return StableDiffusionPipelineOutput(
|
| 1202 |
-
images=image, nsfw_content_detected=has_nsfw_concept)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import copy
|
| 2 |
import re
|
| 3 |
from typing import Any, Callable, Dict, List, Optional, Union
|
|
|
|
| 48 |
... width=1536,
|
| 49 |
... control_image=image,
|
| 50 |
... ).images[0]
|
|
|
|
| 51 |
```
|
| 52 |
"""
|
| 53 |
|
|
|
|
| 135 |
max_length: int):
|
| 136 |
r"""
|
| 137 |
Tokenize a list of prompts and return its tokens with weights of each token.
|
|
|
|
| 138 |
No padding, starting or ending token is included.
|
| 139 |
"""
|
| 140 |
tokens = []
|
|
|
|
| 258 |
Prompts can be assigned with local weights using brackets. For example,
|
| 259 |
prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
|
| 260 |
and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
|
|
|
|
| 261 |
Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
|
|
|
|
| 262 |
Args:
|
| 263 |
pipe (`DiffusionPipeline`):
|
| 264 |
Pipe to provide access to the tokenizer and the text encoder.
|
|
|
|
| 425 |
StableDiffusionControlNetImg2ImgPipeline):
|
| 426 |
r"""
|
| 427 |
Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance.
|
|
|
|
| 428 |
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
| 429 |
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
|
|
|
| 430 |
In addition the pipeline inherits the following loading methods:
|
| 431 |
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
|
|
|
| 432 |
Args:
|
| 433 |
vae ([`AutoencoderKL`]):
|
| 434 |
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
|
|
|
| 598 |
):
|
| 599 |
r"""
|
| 600 |
Encodes the prompt into text encoder hidden states.
|
|
|
|
| 601 |
Args:
|
| 602 |
prompt (`str` or `list(int)`):
|
| 603 |
prompt to be encoded
|
|
|
|
| 800 |
):
|
| 801 |
r"""
|
| 802 |
Function invoked when calling the pipeline for generation.
|
|
|
|
| 803 |
Args:
|
| 804 |
prompt (`str` or `List[str]`, *optional*):
|
| 805 |
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
|
|
|
| 875 |
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
| 876 |
context_size ('int', *optional*, defaults to '768'):
|
| 877 |
tiled size when denoise the latents.
|
|
|
|
| 878 |
Examples:
|
|
|
|
| 879 |
Returns:
|
| 880 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
| 881 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
|
|
|
|
| 890 |
return_dict: bool = True
|
| 891 |
) -> Union[DecoderOutput, torch.FloatTensor]:
|
| 892 |
r"""Decode a batch of images using a tiled decoder.
|
|
|
|
| 893 |
Args:
|
| 894 |
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
| 895 |
steps. This is useful to keep memory use constant regardless of image size. The end result of tiled
|
|
|
|
| 1182 |
return (image, has_nsfw_concept)
|
| 1183 |
|
| 1184 |
return StableDiffusionPipelineOutput(
|
| 1185 |
+
images=image, nsfw_content_detected=has_nsfw_concept)
|