YOURNAME commited on
Commit
c864951
·
1 Parent(s): e3a3cdd
Files changed (2) hide show
  1. pyproject.toml +3 -6
  2. src/pipeline.py +34 -35
pyproject.toml CHANGED
@@ -23,12 +23,9 @@ dependencies = [
23
  ]
24
 
25
  [[tool.edge-maxxing.models]]
26
- repository = "black-forest-labs/FLUX.1-schnell"
27
- revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
28
-
29
- [[tool.edge-maxxing.models]]
30
- repository = "RichardWilliam/XULF_T5_bf16"
31
- revision = "63a3d9ef7b586655600ac9bd4e4747d038237761"
32
 
33
  [[tool.edge-maxxing.models]]
34
  repository = "RichardWilliam/XULF_Vae"
 
23
  ]
24
 
25
  [[tool.edge-maxxing.models]]
26
+ repository = "RichardWilliam/FullyFLUXSCH"
27
+ revision = "c5f4f70c6cb9228a9c258799aadc660dde417af6"
28
+ exclude = ["transformer"]
 
 
 
29
 
30
  [[tool.edge-maxxing.models]]
31
  repository = "RichardWilliam/XULF_Vae"
src/pipeline.py CHANGED
@@ -1,9 +1,14 @@
1
- # asfsdgdvsdgtwtgfgfsgsgdsvxcvsgsg
2
  import os
3
  import torch
4
  import torch._dynamo
5
  import gc
6
 
 
 
 
 
 
7
  from huggingface_hub.constants import HF_HUB_CACHE
8
  from torch import Generator
9
  from diffusers import FluxTransformer2DModel, DiffusionPipeline
@@ -14,24 +19,36 @@ from pipelines.models import TextToImageRequest
14
  from optimum.quanto import requantize
15
  import json
16
  import transformers
17
- # ITs meeeeeeeeeeeeeeeeeeeeeeeeeeeeee
 
18
 
19
 
20
  torch._dynamo.config.suppress_errors = True
21
  os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
22
  os.environ["TOKENIZERS_PARALLELISM"] = "True"
23
 
24
- CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
25
- REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
26
  Pipeline = None
27
  apply_quanto=1
28
 
29
- def reset_cache():
30
  gc.collect()
31
  torch.cuda.empty_cache()
32
  torch.cuda.reset_max_memory_allocated()
33
  torch.cuda.reset_peak_memory_stats()
34
 
 
 
 
 
 
 
 
 
 
 
 
35
  def load_quanto_text_encoder_2(text_repo_path):
36
  with open("quantization_map.json", "r") as f:
37
  quantization_map = json.load(f)
@@ -43,49 +60,32 @@ def load_quanto_text_encoder_2(text_repo_path):
43
  requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cuda"))
44
  return text_encoder_2
45
 
46
- class TransformerDiffusionCompiler:
47
- def __init__(self, diffusion_pipeline, activate_optimization=False):
48
- self.pipeline = diffusion_pipeline
49
- self.optimization = activate_optimization
50
- if self.optimization:
51
- self._enhance_model()
52
-
53
- def _enhance_model(self):
54
- print("Applying advanced optimizations to the transformer pipeline...")
55
- self.pipeline.unet = torch.compile(self.pipeline.unet, mode="reduce-overhead")
56
-
57
- def execute(self, *params, **kwargs):
58
- return self.pipeline(*params, **kwargs)
59
 
60
  def load_pipeline() -> Pipeline:
61
 
62
- origin_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
63
- revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
64
- torch_dtype=torch.bfloat16)
65
-
66
-
67
  main_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_Transfomer/snapshots/6860c51af40329808f270e159a0d018559a1204f")
68
  origin_trans = FluxTransformer2DModel.from_pretrained(main_path,
69
  torch_dtype=torch.bfloat16,
70
  use_safetensors=False).to(memory_format=torch.channels_last)
 
71
  transformer = origin_trans
72
 
73
- pipeline = DiffusionPipeline.from_pretrained(CHECKPOINT,
74
- revision=REVISION,
75
  transformer=transformer,
76
  torch_dtype=torch.bfloat16)
77
  pipeline.to("cuda")
78
 
79
- try:
80
- # pipeline.text_encoder_v2 = load_quanto_text_encoder_2(text_repo_path=None)
81
- pipeline.enable_cuda_graph(type="max-autotune")
82
- pipeline.text_encoder_v2 = load_quanto_text_encoder_2(text_repo_path=None)
83
- pipeline = TransformerDiffusionCompiler(pipeline, activate_optimization=True)
84
- except:
85
- print("Something wrong here")
86
 
87
  for __ in range(3):
88
- pipeline(prompt="whensoever, lodger, Locarnize, hippology, harakeke",
89
  width=1024,
90
  height=1024,
91
  guidance_scale=0.0,
@@ -96,8 +96,7 @@ def load_pipeline() -> Pipeline:
96
  @torch.no_grad()
97
  def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
98
 
99
- reset_cache()
100
- torch.cuda.empty_cache()
101
 
102
  generator = Generator(pipeline.device).manual_seed(request.seed)
103
 
 
1
+ # eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee
2
  import os
3
  import torch
4
  import torch._dynamo
5
  import gc
6
 
7
+ torch._dynamo.config.suppress_errors = True
8
+ os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
9
+ os.environ["TOKENIZERS_PARALLELISM"] = "True"
10
+
11
+
12
  from huggingface_hub.constants import HF_HUB_CACHE
13
  from torch import Generator
14
  from diffusers import FluxTransformer2DModel, DiffusionPipeline
 
19
  from optimum.quanto import requantize
20
  import json
21
  import transformers
22
+ from functools import wraps
23
+
24
 
25
 
26
  torch._dynamo.config.suppress_errors = True
27
  os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
28
  os.environ["TOKENIZERS_PARALLELISM"] = "True"
29
 
30
+ MAIN_ID = "RichardWilliam/FullyFLUXSCH"
31
+ REV = "c5f4f70c6cb9228a9c258799aadc660dde417af6"
32
  Pipeline = None
33
  apply_quanto=1
34
 
35
+ def to_hell():
36
  gc.collect()
37
  torch.cuda.empty_cache()
38
  torch.cuda.reset_max_memory_allocated()
39
  torch.cuda.reset_peak_memory_stats()
40
 
41
+ def error_handler(func):
42
+ @wraps(func)
43
+ def wrapper(*args, **kwargs):
44
+ try:
45
+ return func(*args, **kwargs)
46
+ except Exception as e:
47
+ print(f"Error in {func.__name__}: {str(e)}")
48
+ return None
49
+ return wrapper
50
+
51
+ @error_handler
52
  def load_quanto_text_encoder_2(text_repo_path):
53
  with open("quantization_map.json", "r") as f:
54
  quantization_map = json.load(f)
 
60
  requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cuda"))
61
  return text_encoder_2
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def load_pipeline() -> Pipeline:
65
 
 
 
 
 
 
66
  main_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_Transfomer/snapshots/6860c51af40329808f270e159a0d018559a1204f")
67
  origin_trans = FluxTransformer2DModel.from_pretrained(main_path,
68
  torch_dtype=torch.bfloat16,
69
  use_safetensors=False).to(memory_format=torch.channels_last)
70
+
71
  transformer = origin_trans
72
 
73
+ pipeline = DiffusionPipeline.from_pretrained(MAIN_ID,
74
+ revision=REV,
75
  transformer=transformer,
76
  torch_dtype=torch.bfloat16)
77
  pipeline.to("cuda")
78
 
79
+
80
+ text_encoder_v2 = load_quanto_text_encoder_2(text_repo_path=None)
81
+
82
+ if text_encoder_v2==None:
83
+ print("Something wrong")
84
+ else:
85
+ pipeline.text_encoder_2 = text_encoder_v2
86
 
87
  for __ in range(3):
88
+ pipeline(prompt="I am the worst",
89
  width=1024,
90
  height=1024,
91
  guidance_scale=0.0,
 
96
  @torch.no_grad()
97
  def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
98
 
99
+ to_hell()
 
100
 
101
  generator = Generator(pipeline.device).manual_seed(request.seed)
102