John6666 commited on
Commit
b748322
·
verified ·
1 Parent(s): 8113764

Upload 2 files

Browse files
Files changed (2) hide show
  1. handler.py +79 -0
  2. requirements.txt +14 -0
handler.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any, Dict
3
+
4
+ from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
5
+ from diffusers.utils.remote_utils import remote_decode
6
+ from PIL import Image
7
+ import torch
8
+
9
+ IS_COMPILE = True
10
+
11
+ if IS_COMPILE:
12
+ import torch._dynamo
13
+ torch._dynamo.config.suppress_errors = True
14
+
15
+ #from huggingface_inference_toolkit.logging import logger
16
+
17
+ def compile_pipeline(pipe) -> Any:
18
+ pipe.transformer.fuse_qkv_projections()
19
+ pipe.transformer.to(memory_format=torch.channels_last)
20
+ #pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
21
+ pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True, dynamic=False, backend="inductor")
22
+ return pipe
23
+
24
+ class EndpointHandler:
25
+ def __init__(self, path=""):
26
+ repo_id = "camenduru/FLUX.1-dev-diffusers"
27
+ #repo_id = "NoMoreCopyright/FLUX.1-dev-test"
28
+ dtype = torch.bfloat16
29
+ quantization_config = TorchAoConfig("int8wo")
30
+ vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
31
+ #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
32
+ self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
33
+ if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
34
+ self.pipeline.to("cuda")
35
+
36
+ def __call__(self, data: Dict[str, Any]) -> Image.Image:
37
+ #logger.info(f"Received incoming request with {data=}")
38
+
39
+ if "inputs" in data and isinstance(data["inputs"], str):
40
+ prompt = data.pop("inputs")
41
+ elif "prompt" in data and isinstance(data["prompt"], str):
42
+ prompt = data.pop("prompt")
43
+ else:
44
+ raise ValueError(
45
+ "Provided input body must contain either the key `inputs` or `prompt` with the"
46
+ " prompt to use for the image generation, and it needs to be a non-empty string."
47
+ )
48
+
49
+ parameters = data.pop("parameters", {})
50
+
51
+ num_inference_steps = parameters.get("num_inference_steps", 28)
52
+ width = parameters.get("width", 1024)
53
+ height = parameters.get("height", 1024)
54
+ guidance_scale = parameters.get("guidance_scale", 3.5)
55
+
56
+ # seed generator (seed cannot be provided as is but via a generator)
57
+ seed = parameters.get("seed", 0)
58
+ generator = torch.manual_seed(seed)
59
+
60
+ latent = self.pipeline( # type: ignore
61
+ prompt,
62
+ height=height,
63
+ width=width,
64
+ guidance_scale=guidance_scale,
65
+ num_inference_steps=num_inference_steps,
66
+ generator=generator,
67
+ output_type="latent",
68
+ ).images
69
+
70
+ image = remote_decode(
71
+ endpoint="https://whhx50ex1aryqvw6.us-east-1.aws.endpoints.huggingface.cloud/",
72
+ tensor=latent,
73
+ height=height,
74
+ width=width,
75
+ scaling_factor=0.3611,
76
+ shift_factor=0.1159,
77
+ )
78
+
79
+ return image
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ torch
3
+ torchvision
4
+ torchao
5
+ git+https://github.com/huggingface/diffusers
6
+ peft
7
+ accelerate
8
+ transformers
9
+ numpy
10
+ scipy
11
+ Pillow
12
+ sentencepiece
13
+ protobuf
14
+ pytorch-lightning