Charlyki commited on
Commit
c6da526
·
verified ·
1 Parent(s): c6b48ad

Initial commit with folder contents

Browse files
Files changed (1) hide show
  1. src/pipeline.py +20 -22
src/pipeline.py CHANGED
@@ -1,8 +1,11 @@
1
- from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
2
- from diffusers.image_processor import VaeImageProcessor
3
- from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
4
- from huggingface_hub.constants import HF_HUB_CACHE
5
- from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
 
 
 
6
  import torch
7
  import torch._dynamo
8
  import gc
@@ -14,16 +17,11 @@ import time
14
  from diffusers import DiffusionPipeline
15
  from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
16
  import os
17
- os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
18
-
19
- import torch
20
- import math
21
- from typing import Type, Dict, Any, Tuple, Callable, Optional, Union
22
- import ghanta
23
- import numpy as np
24
- import torch
25
- import torch.nn as nn
26
- import torch.nn.functional as F
27
 
28
  from diffusers.configuration_utils import ConfigMixin, register_to_config
29
  from diffusers.loaders import FromOriginalModelMixin, PeftAdapterMixin
@@ -41,6 +39,7 @@ from diffusers.utils.import_utils import is_torch_npu_available
41
  from diffusers.utils.torch_utils import maybe_allow_in_graph
42
  from diffusers.models.embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, FluxPosEmbed
43
  from diffusers.models.modeling_outputs import Transformer2DModelOutput
 
44
 
45
  class BasicQuantization:
46
  def __init__(self, bits=1):
@@ -201,7 +200,8 @@ class FluxTransformerBlock(nn.Module):
201
  self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
202
  self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
203
 
204
- self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
 
205
  self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
206
  self._chunk_size = None
207
  self._chunk_dim = 0
@@ -433,6 +433,7 @@ class FluxTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrig
433
  ) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
434
  if joint_attention_kwargs is not None:
435
  joint_attention_kwargs = joint_attention_kwargs.copy()
 
436
  lora_scale = joint_attention_kwargs.pop("scale", 1.0)
437
  else:
438
  lora_scale = 1.0
@@ -577,8 +578,6 @@ torch.backends.cuda.matmul.allow_tf32 = True
577
  torch.backends.cudnn.enabled = True
578
  torch.backends.cudnn.benchmark = True
579
 
580
- # ckpt_id = "black-forest-labs/FLUX.1-schnell"
581
- # ckpt_revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
582
  ckpt_id = "Charlyki/extra0Laye0"
583
  ckpt_revision = "2c8c7664a42b9eaf96a979dd2d90910ddb0d1e42"
584
  def empty_cache():
@@ -602,11 +601,11 @@ def load_pipeline() -> Pipeline:
602
 
603
  path = os.path.join(HF_HUB_CACHE, "models--Charlyki--extra1Laye1/snapshots/e851ea424c0d88d420cf85e5adccd41d406d358f")
604
  generator = torch.Generator(device=device)
 
 
605
  model = FluxTransformer2DModel.from_pretrained(path, torch_dtype=dtype, use_safetensors=False, generator= generator).to(memory_format=torch.channels_last)
606
  torch.backends.cudnn.benchmark = True
607
  torch.backends.cudnn.deterministic = False
608
- # model = torch.compile(model, mode="max-autotune-no-cudagraphs")
609
- # model = torch.compile(model,backend="aot_eager")
610
  vae = torch.compile(vae)
611
  pipeline = DiffusionPipeline.from_pretrained(
612
  ckpt_id,
@@ -621,8 +620,7 @@ def load_pipeline() -> Pipeline:
621
  pipeline.text_encoder_2.requires_grad_(False)
622
  pipeline.text_encoder.requires_grad_(False)
623
 
624
- # pipeline.enable_sequential_cpu_offload(exclude=["transformer"])
625
-
626
  for _ in range(3):
627
  pipeline(prompt="blah blah waah waah oneshot oneshot gang gang", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
628
 
 
1
+ import torch
2
+ import math
3
+ from typing import Type, Dict, Any, Tuple, Callable, Optional, Union
4
+ import ghanta
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
  import torch
10
  import torch._dynamo
11
  import gc
 
17
  from diffusers import DiffusionPipeline
18
  from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
19
  import os
20
+ from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
21
+ from diffusers.image_processor import VaeImageProcessor
22
+ from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
23
+ from huggingface_hub.constants import HF_HUB_CACHE
24
+ from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
 
 
 
 
 
25
 
26
  from diffusers.configuration_utils import ConfigMixin, register_to_config
27
  from diffusers.loaders import FromOriginalModelMixin, PeftAdapterMixin
 
39
  from diffusers.utils.torch_utils import maybe_allow_in_graph
40
  from diffusers.models.embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, FluxPosEmbed
41
  from diffusers.models.modeling_outputs import Transformer2DModelOutput
42
+ os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
43
 
44
  class BasicQuantization:
45
  def __init__(self, bits=1):
 
200
  self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
201
  self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
202
 
203
+ # self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6)
204
+ self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5)
205
  self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
206
  self._chunk_size = None
207
  self._chunk_dim = 0
 
433
  ) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
434
  if joint_attention_kwargs is not None:
435
  joint_attention_kwargs = joint_attention_kwargs.copy()
436
+ # lora_scale = joint_attention_kwargs.pop("scale", 2.0)
437
  lora_scale = joint_attention_kwargs.pop("scale", 1.0)
438
  else:
439
  lora_scale = 1.0
 
578
  torch.backends.cudnn.enabled = True
579
  torch.backends.cudnn.benchmark = True
580
 
 
 
581
  ckpt_id = "Charlyki/extra0Laye0"
582
  ckpt_revision = "2c8c7664a42b9eaf96a979dd2d90910ddb0d1e42"
583
  def empty_cache():
 
601
 
602
  path = os.path.join(HF_HUB_CACHE, "models--Charlyki--extra1Laye1/snapshots/e851ea424c0d88d420cf85e5adccd41d406d358f")
603
  generator = torch.Generator(device=device)
604
+ # model = torch.compile(model, mode="max-autotune-no-cudagraphs")
605
+ # model = torch.compile(model,backend="aot_eager")
606
  model = FluxTransformer2DModel.from_pretrained(path, torch_dtype=dtype, use_safetensors=False, generator= generator).to(memory_format=torch.channels_last)
607
  torch.backends.cudnn.benchmark = True
608
  torch.backends.cudnn.deterministic = False
 
 
609
  vae = torch.compile(vae)
610
  pipeline = DiffusionPipeline.from_pretrained(
611
  ckpt_id,
 
620
  pipeline.text_encoder_2.requires_grad_(False)
621
  pipeline.text_encoder.requires_grad_(False)
622
 
623
+ # for _ in range(4):
 
624
  for _ in range(3):
625
  pipeline(prompt="blah blah waah waah oneshot oneshot gang gang", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
626