jokerbit
/

flux-q-4

Model card Files Files and versions

flux-q-4 / src /pipeline.py

jokerbit's picture

Upload folder using huggingface_hub

246e5d7 verified over 1 year ago

history blame contribute delete

3.27 kB

	import gc
	import os
	from typing import TypeAlias

	import torch
	from PIL.Image import Image
	from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, AutoencoderTiny
	from huggingface_hub.constants import HF_HUB_CACHE
	from pipelines.models import TextToImageRequest
	from torch import Generator
	from torchao.quantization import quantize_, int8_weight_only
	from transformers import T5EncoderModel, CLIPTextModel


	Pipeline: TypeAlias = FluxPipeline
	torch.backends.cudnn.benchmark = True

	CHECKPOINT = "jokerbit/flux.1-schnell-Robert-int8wo"
	REVISION = "5ef0012f11a863e5111ec56540302a023bc8587b"

	TinyVAE = "madebyollin/taef1"
	TinyVAE_REV = "2d552378e58c9c94201075708d7de4e1163b2689"


	def load_pipeline() -> Pipeline:
	text_encoder = CLIPTextModel.from_pretrained(
	CHECKPOINT,
	revision=REVISION,
	subfolder="text_encoder",
	local_files_only=True,
	torch_dtype=torch.bfloat16,
	)

	text_encoder_2 = T5EncoderModel.from_pretrained(
	CHECKPOINT,
	revision=REVISION,
	subfolder="text_encoder_2",
	local_files_only=True,
	torch_dtype=torch.bfloat16,
	)

	vae = AutoencoderTiny.from_pretrained(
	TinyVAE,
	revision=TinyVAE_REV,
	local_files_only=True,
	torch_dtype=torch.bfloat16,
	)

	path = os.path.join(HF_HUB_CACHE, "models--jokerbit--flux.1-schnell-Robert-int8wo/snapshots/5ef0012f11a863e5111ec56540302a023bc8587b/transformer")

	transformer = FluxTransformer2DModel.from_pretrained(
	path,
	torch_dtype=torch.bfloat16,
	use_safetensors=False,
	)
	pipeline = FluxPipeline.from_pretrained(
	CHECKPOINT,
	revision=REVISION,
	local_files_only=True,
	text_encoder=text_encoder,
	text_encoder_2=text_encoder_2,
	transformer=transformer,
	vae=vae,
	torch_dtype=torch.bfloat16,
	).to("cuda")
	# pipeline.text_encoder_2.to(memory_format=torch.channels_last)
	# pipeline.transformer.to(memory_format=torch.channels_last)
	# pipeline.vae.to(memory_format=torch.channels_last)
	for _ in range(2):
	pipeline("cat", num_inference_steps=4)

	return pipeline

	def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
	gc.collect()
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats()

	generator = Generator(pipeline.device).manual_seed(request.seed)

	return pipeline(
	request.prompt,
	generator=generator,
	guidance_scale=0.0,
	num_inference_steps=4,
	max_sequence_length=256,
	height=request.height,
	width=request.width,
	).images[0]


	if __name__ == "__main__":
	from time import perf_counter
	PROMPT = 'martyr, semiconformity, peregrination, quip, twineless, emotionless, tawa, depickle'
	request = TextToImageRequest(prompt=PROMPT,
	height=None,
	width=None,
	seed=666)
	start_time = perf_counter()
	pipe_ = load_pipeline()
	stop_time = perf_counter()
	print(f"Pipeline is loaded in {stop_time - start_time}s")
	for _ in range(4):
	start_time = perf_counter()
	infer(request, pipe_)
	stop_time = perf_counter()
	print(f"Request in {stop_time - start_time}s")