Update src/pipeline.py

6d5fa4c verified about 1 year ago

5.25 kB

	from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
	from diffusers.image_processor import VaeImageProcessor
	from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
	from huggingface_hub.constants import HF_HUB_CACHE
	from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
	import torch
	import torch._dynamo
	import gc
	from PIL import Image as img
	from PIL.Image import Image
	from pipelines.models import TextToImageRequest
	from torch import Generator
	import time
	from diffusers import DiffusionPipeline
	from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only

	import torch
	import math
	from typing import Type, Dict, Any, Tuple, Callable, Optional, Union
	import ghanta
	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from diffusers.configuration_utils import ConfigMixin, register_to_config
	from diffusers.loaders import FromOriginalModelMixin, PeftAdapterMixin
	from diffusers.models.attention import FeedForward
	from diffusers.models.attention_processor import (
	Attention,
	AttentionProcessor,
	FluxAttnProcessor2_0,
	FusedFluxAttnProcessor2_0,
	)
	from diffusers.models.modeling_utils import ModelMixin
	from diffusers.models.normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle
	from diffusers.utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
	from diffusers.utils.import_utils import is_torch_npu_available
	from diffusers.utils.torch_utils import maybe_allow_in_graph
	from diffusers.models.embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, FluxPosEmbed
	from diffusers.models.modeling_outputs import Transformer2DModelOutput
	from diffusers import FluxPipeline, FluxTransformer2DModel
	from model import E, D
	import torchvision
	import os

	os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
	os.environ["TOKENIZERS_PARALLELISM"] = "True"
	torch._dynamo.config.suppress_errors = True


	Pipeline = None
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.enabled = True
	torch.backends.cudnn.benchmark = True

	ckpt_id = "black-forest-labs/FLUX.1-schnell"
	ckpt_revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"

	TinyVAE = "madebyollin/taef1"
	TinyVAE_REV = "2d552378e58c9c94201075708d7de4e1163b2689"

	def empty_cache():
	gc.collect()
	torch.cuda.empty_cache()
	torch.cuda.reset_max_memory_allocated()
	torch.cuda.reset_peak_memory_stats()


	def load_pipeline() -> Pipeline:
	path = os.path.join(HF_HUB_CACHE, "models--manbeast3b--flux.1-schnell-full1/snapshots/cb1b599b0d712b9aab2c4df3ad27b050a27ec146/transformer")
	transformer = FluxTransformer2DModel.from_pretrained(path, torch_dtype=torch.bfloat16, use_safetensors=False)
	vae = AutoencoderTiny.from_pretrained(
	TinyVAE,
	revision=TinyVAE_REV,
	local_files_only=True,
	torch_dtype=torch.bfloat16)
	vae.encoder = E(16)
	vae.decoder = D(16)
	ko_state_dict = torch.load("ko.pth", map_location="cpu", weights_only=True)
	filtered_state_dict = {k.strip('encoder.'): v for k, v in ko_state_dict.items() if k.strip('encoder.') in vae.encoder.state_dict() and v.size() == vae.encoder.state_dict()[k.strip('encoder.')].size()}
	vae.encoder.load_state_dict(filtered_state_dict, strict=False)
	vae.encoder.requires_grad_(False).to(dtype=torch.bfloat16)
	ok_state_dict = torch.load("ok.pth", map_location="cpu", weights_only=True)
	filtered_state_dict = {k.strip('decoder.'): v for k, v in ok_state_dict.items() if k.strip('decoder.') in vae.decoder.state_dict() and v.size() == vae.decoder.state_dict()[k.strip('decoder.')].size()}
	vae.decoder.load_state_dict(filtered_state_dict, strict=False)
	vae.decoder.requires_grad_(False).to(dtype=torch.bfloat16)

	pipeline = FluxPipeline.from_pretrained(ckpt_id, revision=ckpt_revision, transformer=transformer, vae=vae, local_files_only=True, torch_dtype=torch.bfloat16,)
	pipeline.to("cuda")

	# Optimize memory format
	for component in [pipeline.text_encoder, pipeline.text_encoder_2, pipeline.transformer, pipeline.vae]:
	component.to(memory_format=torch.channels_last)

	# quantize_(pipeline.vae, int8_weight_only())
	pipeline.transformer = torch.compile(pipeline.transformer, mode="max-autotune", fullgraph=True)
	pipeline.vae = torch.compile(pipeline.vae, mode="max-autotune", fullgraph=True)

	for _ in range(2):
	pipeline(prompt="insensible, timbale, pothery, electrovital, actinogram, taxis, intracerebellar, centrodesmus", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
	return pipeline

	sample = 1
	@torch.no_grad()
	def infer(request: TextToImageRequest, pipeline: Pipeline, generator: Generator) -> Image:
	global sample
	if not sample:
	sample=1
	empty_cache()
	image=pipeline(request.prompt,generator=generator, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256, height=request.height, width=request.width, output_type="pt").images[0]
	return torchvision.transforms.functional.to_pil_image(image.to(torch.float32).mul_(2).sub_(1))