Galaxy10 / src /pipeline.py
VictorTn's picture
Initial commit with folder contents
fd7197b verified
import os
from typing import Type
import torch
import torch._dynamo
import torch.nn.functional as F
from PIL import Image
from torch import Generator
from diffusers import DiffusionPipeline, FluxTransformer2DModel
from huggingface_hub.constants import HF_HUB_CACHE
from transformers import T5EncoderModel
from pipelines.models import TextToImageRequest
# Configure environment variables
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "expandable_segments:True"
os.environ["TOKENIZERS_PARALLELISM"] = "True"
torch._dynamo.config.suppress_errors = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.enabled = True
Pipeline = None
def load_pipeline() -> Pipeline:
"""
Load and initialize the Diffusion Pipeline with custom components.
Returns:
Pipeline: Initialized diffusion pipeline.
"""
# Configuration for model checkpoints
ckpt_id = "black-forest-labs/FLUX.1-schnell"
ckpt_revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
# Load the secondary text encoder
text_encoder_2 = T5EncoderModel.from_pretrained(
"VictorTn/extra0izer0",
revision="ea3cc69c8eba166304100f994e9b6ec9f5179a9d",
subfolder="text_encoder_2",
torch_dtype=torch.bfloat16
)
# Load the transformer model
transformer_path = os.path.join(
HF_HUB_CACHE,
"models--VictorTn--extra0izer0/snapshots/ea3cc69c8eba166304100f994e9b6ec9f5179a9d/transformer"
)
transformer = FluxTransformer2DModel.from_pretrained(
transformer_path,
torch_dtype=torch.bfloat16,
use_safetensors=False
)
# Initialize the diffusion pipeline
pipeline = DiffusionPipeline.from_pretrained(
ckpt_id,
revision=ckpt_revision,
transformer=transformer,
text_encoder_2=text_encoder_2,
torch_dtype=torch.bfloat16
)
# Move pipeline to GPU and optimize memory format
pipeline.to("cuda")
pipeline.to(memory_format=torch.channels_last)
# Perform a warm-up run
with torch.inference_mode():
pipeline(
prompt="insensible, timbale, pothery, electrovital, actinogram, taxis, intracerebellar, centrodesmus",
width=1024,
height=1024,
guidance_scale=0.0,
num_inference_steps=4,
max_sequence_length=256
)
return pipeline
@torch.no_grad()
def infer(request: TextToImageRequest, pipeline: Pipeline, generator: Generator) -> Image:
"""
Perform inference using the provided diffusion pipeline.
Args:
request (TextToImageRequest): The text-to-image request containing prompt and image dimensions.
pipeline (Pipeline): The initialized diffusion pipeline.
generator (Generator): Random generator for reproducibility.
Returns:
Image: Generated PIL image.
"""
return pipeline(
request.prompt,
generator=generator,
guidance_scale=0.0,
num_inference_steps=4,
max_sequence_length=256,
height=request.height,
width=request.width,
output_type="pil"
).images[0]