John6666 commited on
Commit
2393f58
·
verified ·
1 Parent(s): 99e8fa1

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +4 -3
handler.py CHANGED
@@ -14,10 +14,8 @@ if IS_COMPILE:
14
  #from huggingface_inference_toolkit.logging import logger
15
 
16
  def compile_pipeline(pipe) -> Any:
17
- pipe.transformer.fuse_qkv_projections()
18
  pipe.transformer.to(memory_format=torch.channels_last)
19
  pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
20
- pipe.vae.fuse_qkv_projections()
21
  pipe.vae.to(memory_format=torch.channels_last)
22
  pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
23
  return pipe
@@ -27,10 +25,12 @@ class EndpointHandler:
27
  repo_id = "camenduru/FLUX.1-dev-diffusers"
28
  #repo_id = "NoMoreCopyright/FLUX.1-dev-test"
29
  dtype = torch.bfloat16
30
- quantization_config = TorchAoConfig("int8wo")
31
  vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
32
  #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
33
  self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
 
 
34
  if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
35
  self.pipeline.to("cuda")
36
 
@@ -68,3 +68,4 @@ class EndpointHandler:
68
  output_type="pil",
69
  ).images[0]
70
 
 
 
14
  #from huggingface_inference_toolkit.logging import logger
15
 
16
  def compile_pipeline(pipe) -> Any:
 
17
  pipe.transformer.to(memory_format=torch.channels_last)
18
  pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
 
19
  pipe.vae.to(memory_format=torch.channels_last)
20
  pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
21
  return pipe
 
25
  repo_id = "camenduru/FLUX.1-dev-diffusers"
26
  #repo_id = "NoMoreCopyright/FLUX.1-dev-test"
27
  dtype = torch.bfloat16
28
+ quantization_config = TorchAoConfig("int8dq")
29
  vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
30
  #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
31
  self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
32
+ self.pipeline.transformer.fuse_qkv_projections()
33
+ self.pipeline.vae.fuse_qkv_projections()
34
  if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
35
  self.pipeline.to("cuda")
36
 
 
68
  output_type="pil",
69
  ).images[0]
70
 
71
+