English
John6666 commited on
Commit
cc5f48e
·
verified ·
1 Parent(s): aaeadfb

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +4 -6
handler.py CHANGED
@@ -216,7 +216,6 @@ def load_pipeline_fast(repo_id: str, dtype: torch.dtype) -> Any:
216
  pipe.vae.fuse_qkv_projections()
217
  pipe.transformer.to(memory_format=torch.channels_last)
218
  pipe.vae.to(memory_format=torch.channels_last)
219
- apply_cache_on_pipe(pipe, residual_diff_threshold=0.12)
220
  if IS_QUANT and not IS_AUTOQ:
221
  quantize_(pipe.text_encoder, int8_dynamic_activation_int8_weight())
222
  quantize_(pipe.text_encoder_2, int8_dynamic_activation_int8_weight())
@@ -240,12 +239,11 @@ class EndpointHandler:
240
  elif IS_COMPILE: self.pipeline = load_pipeline_fast(repo_id, dtype)
241
  elif IS_LVRAM and IS_CC89: self.pipeline = load_pipeline_lowvram(repo_id, dtype)
242
  else: self.pipeline = load_pipeline_stable(repo_id, dtype)
243
- if IS_PARA: apply_cache_on_pipe(self.pipeline, residual_diff_threshold=0.12)
 
244
  self.pipeline.to("cuda")
245
- if not IS_COMPILE:
246
- self.pipeline.enable_vae_slicing()
247
- self.pipeline.enable_vae_tiling()
248
- else:
249
  print("Compiling pipeline...")
250
  self.pipeline.transformer = torch.compile(self.pipeline.transformer, mode="max-autotune-no-cudagraphs")
251
  self.pipeline.vae = torch.compile(self.pipeline.vae, mode="max-autotune-no-cudagraphs")
 
216
  pipe.vae.fuse_qkv_projections()
217
  pipe.transformer.to(memory_format=torch.channels_last)
218
  pipe.vae.to(memory_format=torch.channels_last)
 
219
  if IS_QUANT and not IS_AUTOQ:
220
  quantize_(pipe.text_encoder, int8_dynamic_activation_int8_weight())
221
  quantize_(pipe.text_encoder_2, int8_dynamic_activation_int8_weight())
 
239
  elif IS_COMPILE: self.pipeline = load_pipeline_fast(repo_id, dtype)
240
  elif IS_LVRAM and IS_CC89: self.pipeline = load_pipeline_lowvram(repo_id, dtype)
241
  else: self.pipeline = load_pipeline_stable(repo_id, dtype)
242
+ self.pipeline.enable_vae_slicing()
243
+ self.pipeline.enable_vae_tiling()
244
  self.pipeline.to("cuda")
245
+ if IS_PARA: apply_cache_on_pipe(self.pipeline, residual_diff_threshold=0.12)
246
+ if IS_COMPILE:
 
 
247
  print("Compiling pipeline...")
248
  self.pipeline.transformer = torch.compile(self.pipeline.transformer, mode="max-autotune-no-cudagraphs")
249
  self.pipeline.vae = torch.compile(self.pipeline.vae, mode="max-autotune-no-cudagraphs")