manbeast3b commited on
Commit
a7ab256
·
verified ·
1 Parent(s): bb30052

Update src/pipeline.py

Browse files
Files changed (1) hide show
  1. src/pipeline.py +16 -14
src/pipeline.py CHANGED
@@ -1,8 +1,5 @@
1
- from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
2
- from diffusers.image_processor import VaeImageProcessor
3
- from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
4
-
5
- from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
6
  import torch
7
  import torch._dynamo
8
  import gc
@@ -12,7 +9,7 @@ from pipelines.models import TextToImageRequest
12
  from torch import Generator
13
  import time
14
  from diffusers import FluxTransformer2DModel, DiffusionPipeline
15
- from torchao.quantization import quantize_,int8_weight_only
16
  import os
17
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:False,garbage_collection_threshold:0.01"
18
 
@@ -29,35 +26,40 @@ def load_pipeline() -> Pipeline:
29
  clear()
30
 
31
  text_encoder_2 = T5EncoderModel.from_pretrained(
32
- "city96/t5-v1_1-xxl-encoder-bf16", torch_dtype=torch.bfloat16
33
  )
34
- vae=AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype)
 
 
35
  pipeline = DiffusionPipeline.from_pretrained(
36
- ckpt_id,
37
  vae=vae,
38
  text_encoder_2 = text_encoder_2,
39
- torch_dtype=dtype,
40
  )
41
  torch.backends.cudnn.benchmark = True
42
  torch.backends.cuda.matmul.allow_tf32 = True
43
  torch.cuda.set_per_process_memory_fraction(0.99)
44
  pipeline.text_encoder.to(memory_format=torch.channels_last)
45
  pipeline.transformer.to(memory_format=torch.channels_last)
46
-
47
-
48
  pipeline.vae.to(memory_format=torch.channels_last)
49
  pipeline.vae = torch.compile(pipeline.vae)
50
 
51
  pipeline._exclude_from_cpu_offload = ["vae"]
52
  pipeline.enable_sequential_cpu_offload()
53
- for _ in range(2):
54
  pipeline(prompt="onomancy, aftergo, spirantic, Platyhelmia, modificator, drupaceous, jobbernowl, hereness", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
55
-
56
  return pipeline
57
 
58
 
 
59
  @torch.inference_mode()
60
  def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
 
 
 
 
61
  torch.cuda.reset_peak_memory_stats()
62
  generator = Generator("cuda").manual_seed(request.seed)
63
  image=pipeline(request.prompt,generator=generator, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256, height=request.height, width=request.width, output_type="pil").images[0]
 
1
+ from diffusers import AutoencoderTiny
2
+ from transformers import T5EncoderModel
 
 
 
3
  import torch
4
  import torch._dynamo
5
  import gc
 
9
  from torch import Generator
10
  import time
11
  from diffusers import FluxTransformer2DModel, DiffusionPipeline
12
+ from torchao.quantization import quantize_, int8_weight_only
13
  import os
14
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:False,garbage_collection_threshold:0.01"
15
 
 
26
  clear()
27
 
28
  text_encoder_2 = T5EncoderModel.from_pretrained(
29
+ "city96/t5-v1_1-xxl-encoder-bf16", torch_dtype=DTYPE
30
  )
31
+
32
+ vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=DTYPE)
33
+ quantize_(vae, int8_weight_only())
34
  pipeline = DiffusionPipeline.from_pretrained(
35
+ MODEL_ID,
36
  vae=vae,
37
  text_encoder_2 = text_encoder_2,
38
+ torch_dtype=DTYPE,
39
  )
40
  torch.backends.cudnn.benchmark = True
41
  torch.backends.cuda.matmul.allow_tf32 = True
42
  torch.cuda.set_per_process_memory_fraction(0.99)
43
  pipeline.text_encoder.to(memory_format=torch.channels_last)
44
  pipeline.transformer.to(memory_format=torch.channels_last)
 
 
45
  pipeline.vae.to(memory_format=torch.channels_last)
46
  pipeline.vae = torch.compile(pipeline.vae)
47
 
48
  pipeline._exclude_from_cpu_offload = ["vae"]
49
  pipeline.enable_sequential_cpu_offload()
50
+ for _ in range(1):
51
  pipeline(prompt="onomancy, aftergo, spirantic, Platyhelmia, modificator, drupaceous, jobbernowl, hereness", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
52
+ pipeline(prompt="", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
53
  return pipeline
54
 
55
 
56
+ sample = True
57
  @torch.inference_mode()
58
  def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
59
+ global sample
60
+ if sample:
61
+ clear()
62
+ sample = None
63
  torch.cuda.reset_peak_memory_stats()
64
  generator = Generator("cuda").manual_seed(request.seed)
65
  image=pipeline(request.prompt,generator=generator, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256, height=request.height, width=request.width, output_type="pil").images[0]