Your Name commited on
Commit
30df75d
·
1 Parent(s): c3ba03f
Files changed (4) hide show
  1. README.md +1 -0
  2. mapping_t5.json +0 -1
  3. src/pipeline.py +44 -76
  4. transformer_int8.json +0 -0
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # FLUX OPT
mapping_t5.json DELETED
@@ -1 +0,0 @@
1
- {"encoder.block.0.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.0.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.0.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.0.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.0.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.0.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.0.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.1.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.2.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.3.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.4.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.5.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.6.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.7.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.8.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.9.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.10.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.11.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.12.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.13.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.14.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.15.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.16.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.17.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.18.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.19.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.20.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.21.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.22.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.0.SelfAttention.q": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.0.SelfAttention.k": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.0.SelfAttention.v": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.0.SelfAttention.o": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.1.DenseReluDense.wi_0": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.1.DenseReluDense.wi_1": {"weights": "qint8", "activations": "none"}, "encoder.block.23.layer.1.DenseReluDense.wo": {"weights": "qint8", "activations": "none"}}
 
 
src/pipeline.py CHANGED
@@ -1,34 +1,37 @@
1
- # FLux Optimization Pipeline
2
  import os
3
  import torch
4
  import torch._dynamo
5
  import gc
6
-
 
7
 
8
  from huggingface_hub.constants import HF_HUB_CACHE
9
- from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
10
-
11
  from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
12
  from torch import Generator
13
  from diffusers import FluxTransformer2DModel, DiffusionPipeline
14
 
15
  from PIL.Image import Image
16
- from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
17
  from pipelines.models import TextToImageRequest
18
- from optimum.quanto import requantize
19
- import json
20
- import transformers
21
 
 
 
 
 
22
 
23
 
24
  torch._dynamo.config.suppress_errors = True
25
  os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
26
  os.environ["TOKENIZERS_PARALLELISM"] = "True"
27
 
28
- CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
29
- REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
30
  Pipeline = None
31
- apply_quanto=1
32
 
33
  import torch
34
  import gc
@@ -36,88 +39,52 @@ import os
36
  import json
37
  import transformers
38
 
39
- def perform_memory_maintenance():
40
- """A convoluted way of handling memory management for CUDA."""
41
- [fn() for fn in [
42
- torch.cuda.empty_cache,
43
- torch.cuda.reset_max_memory_allocated,
44
- torch.cuda.reset_peak_memory_stats,
45
- gc.collect
46
- ]]
47
-
48
- def obscurely_load_encoder(repo_path):
49
- """
50
- Loads a T5 encoder with multiple layers of abstraction and complexity.
51
-
52
- Args:
53
- repo_path (str): The cryptic location of the repository files.
54
-
55
- Returns:
56
- An enigmatic, quantized T5 encoder model.
57
- """
58
- # Hidden mechanism to load JSON data
59
- def load_json(file_path):
60
- with open(file_path, "r") as f:
61
- return json.load(f)
62
-
63
- # Fetch quantization map
64
- quant_map = load_json("mapping_t5.json")
65
-
66
- # Acquire the mysterious T5 configuration
67
- t5_config = transformers.T5Config(**load_json(os.path.join(repo_path, "config.json")))
68
-
69
- # Cloak the model instantiation in an unfamiliar syntax
70
- device_context = torch.device("cuda")
71
- encoder = transformers.T5EncoderModel(t5_config).to(torch.bfloat16) if device_context.type == "meta" else None
72
-
73
- # A vacuous state_dict waiting for purpose
74
- model_weights = None
75
-
76
- # Perform the shadowy act of quantization
77
- requantize(
78
- model=encoder,
79
- state_dict=model_weights,
80
- quantization_map=quant_map,
81
- device=torch.device("cuda")
82
- )
83
- return encoder
84
 
85
- def load_pipeline() -> Pipeline:
 
 
 
 
 
86
 
87
- try:
88
- origin_t5_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_T5_bf16/snapshots/63a3d9ef7b586655600ac9bd4e4747d038237761")
89
- text_encoder_2 = obscurely_load_encoder(_path=origin_t5_path)
90
- except:
91
- text_encoder_2 = T5EncoderModel.from_pretrained("RichardWilliam/XULF_T5_bf16",
92
- revision = "63a3d9ef7b586655600ac9bd4e4747d038237761",
93
- torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
94
 
95
- origin_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
 
 
 
96
  revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
97
  torch_dtype=torch.bfloat16)
 
98
 
 
 
 
99
 
100
  trans_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_Transfomer/snapshots/6860c51af40329808f270e159a0d018559a1204f")
101
- origin_trans = FluxTransformer2DModel.from_pretrained(trans_path,
102
  torch_dtype=torch.bfloat16,
103
  use_safetensors=False).to(memory_format=torch.channels_last)
104
- transformer = origin_trans
105
 
106
- pipeline = DiffusionPipeline.from_pretrained(CHECKPOINT,
107
- revision=REVISION,
108
- vae=origin_vae,
109
  transformer=transformer,
110
  text_encoder_2=text_encoder_2,
111
  torch_dtype=torch.bfloat16)
112
  pipeline.to("cuda")
113
  try:
114
- # pipeline.enable_sequential_cpu_offload()
115
- pipeline.vae.enable_slicing()
116
  except:
117
- pass
118
 
119
- for __ in range(3):
120
- pipeline(prompt="sweet, subordinative, gender, mormyre, arteriolosclerosis, positivism, Antiochianism, palmerite",
121
  width=1024,
122
  height=1024,
123
  guidance_scale=0.0,
@@ -128,7 +95,8 @@ def load_pipeline() -> Pipeline:
128
  @torch.no_grad()
129
  def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
130
 
131
- perform_memory_maintenance()
 
132
 
133
  generator = Generator(pipeline.device).manual_seed(request.seed)
134
 
 
1
+ # Quanto optimization, unique
2
  import os
3
  import torch
4
  import torch._dynamo
5
  import gc
6
+ import json
7
+ import transformers
8
 
9
  from huggingface_hub.constants import HF_HUB_CACHE
10
+ from transformers import T5EncoderModel
11
+ import diffusers
12
  from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
13
  from torch import Generator
14
  from diffusers import FluxTransformer2DModel, DiffusionPipeline
15
 
16
  from PIL.Image import Image
17
+ from diffusers import AutoencoderTiny
18
  from pipelines.models import TextToImageRequest
19
+ from optimum.quanto import requantize as optimum_quant
 
 
20
 
21
+ try:
22
+ from huggingface_hub import hf_hub_download
23
+ except:
24
+ pass
25
 
26
 
27
  torch._dynamo.config.suppress_errors = True
28
  os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
29
  os.environ["TOKENIZERS_PARALLELISM"] = "True"
30
 
31
+ ckpt_main = "black-forest-labs/FLUX.1-schnell"
32
+ revision_main = "741f7c3ce8b383c54771c7003378a50191e9efe9"
33
  Pipeline = None
34
+ apply_transformer_tag = 1
35
 
36
  import torch
37
  import gc
 
39
  import json
40
  import transformers
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ def convert_transformer_to_int8(repo_path):
44
+ with open("transformer_int8.json", "r") as f:
45
+ quantization_map = json.load(f)
46
+ with torch.device("meta"):
47
+ transformer_config_path = os.path.join(repo_path, "config.json")
48
+ transformer = diffusers.FluxTransformer2DModel.from_config(transformer_config_path).to(torch.bfloat16)
49
 
50
+ state_dict = hf_hub_download(repo_path, "diffusion_pytorch_models.safetensors")
51
+
52
+ optimum_quant(transformer, state_dict, quantization_map, device=torch.device("cuda"))
53
+ return transformer
 
 
 
54
 
55
+
56
+ def load_pipeline() -> Pipeline:
57
+
58
+ original_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
59
  revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
60
  torch_dtype=torch.bfloat16)
61
+
62
 
63
+ text_encoder_2 = T5EncoderModel.from_pretrained("RichardWilliam/XULF_T5_bf16",
64
+ revision = "63a3d9ef7b586655600ac9bd4e4747d038237761",
65
+ torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
66
 
67
  trans_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_Transfomer/snapshots/6860c51af40329808f270e159a0d018559a1204f")
68
+ pre_quanted_trans = FluxTransformer2DModel.from_pretrained(trans_path,
69
  torch_dtype=torch.bfloat16,
70
  use_safetensors=False).to(memory_format=torch.channels_last)
71
+ transformer = pre_quanted_trans
72
 
73
+ pipeline = DiffusionPipeline.from_pretrained(ckpt_main,
74
+ revision=revision_main,
75
+ vae=original_vae,
76
  transformer=transformer,
77
  text_encoder_2=text_encoder_2,
78
  torch_dtype=torch.bfloat16)
79
  pipeline.to("cuda")
80
  try:
81
+ pipeline.enable_int8()
82
+ pipeline.transformer = convert_transformer_to_int8(trans_path)
83
  except:
84
+ print("Use origin pipeline")
85
 
86
+ for warm_up_prompt in range(3):
87
+ pipeline(prompt="puffer, cutie, buttinsky, prototrophic, betulinamaric, quintet, tunesome, decaspermous",
88
  width=1024,
89
  height=1024,
90
  guidance_scale=0.0,
 
95
  @torch.no_grad()
96
  def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
97
 
98
+ gc.collect()
99
+ torch.cuda.empty_cache()
100
 
101
  generator = Generator(pipeline.device).manual_seed(request.seed)
102
 
transformer_int8.json ADDED
File without changes