Chrissy1
/

LunarPhase

Model card Files Files and versions

xet

Community

Chrissy1 commited on Jan 28, 2025

Commit

f65f896

verified ·

1 Parent(s): 3117cfa

Initial commit with folder contents

Browse files

Files changed (1) hide show

src/pipeline.py +83 -32

src/pipeline.py CHANGED Viewed

@@ -1,3 +1,10 @@
 from diffusers import (
     DiffusionPipeline,
     AutoencoderKL,
@@ -13,56 +20,100 @@ from transformers import (
     CLIPTokenizer,
     CLIPTextModel
 )
-import torch
-import torch._dynamo
-import gc
-from PIL import Image
 from pipelines.models import TextToImageRequest
 from torch import Generator
-import time
-import math
-from typing import Type, Dict, Any, Tuple, Callable, Optional, Union
-import numpy as np
-import torch.nn as nn
-import torch.nn.functional as F
-from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
-# preconfigs
-import os
-os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
 os.environ["TOKENIZERS_PARALLELISM"] = "True"
 torch._dynamo.config.suppress_errors = True
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.enabled = True
-# torch.backends.cudnn.benchmark = True
-# globals
 Pipeline = None
-ckpt_id = "black-forest-labs/FLUX.1-schnell"
-ckpt_revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
 def empty_cache():
     gc.collect()
     torch.cuda.empty_cache()
     torch.cuda.reset_max_memory_allocated()
     torch.cuda.reset_peak_memory_stats()
-def load_pipeline() -> Pipeline:
-    text_encoder_2 = T5EncoderModel.from_pretrained("Chrissy1/extra0manQ0", revision = "c0db1e82d89825a4664ad873f20d261cbe46e737", subfolder="text_encoder_2",torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
-    path = os.path.join(HF_HUB_CACHE, "models--Chrissy1--extra0manQ0/snapshots/c0db1e82d89825a4664ad873f20d261cbe46e737/transformer")
-    transformer = FluxTransformer2DModel.from_pretrained(path, torch_dtype=torch.bfloat16, use_safetensors=False).to(memory_format=torch.channels_last)
-    quantize_(AutoencoderKL.from_pretrained(ckpt_id,revision=ckpt_revision, subfolder="vae", local_files_only=True, torch_dtype=torch.bfloat16,), int8_weight_only())
-    pipeline = FluxPipeline.from_pretrained(ckpt_id, revision=ckpt_revision, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16,)
     pipeline.to("cuda")
     with torch.inference_mode():
-        pipeline(prompt="insensible, timbale, pothery, electrovital, actinogram, taxis, intracerebellar, centrodesmus", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
     return pipeline
-sample = 1
 @torch.no_grad()
-def infer(request: TextToImageRequest, pipeline: Pipeline, generator: Generator) -> Image:
-    global sample
-    if not sample:
-        sample=1
-        empty_cache()
-    return pipeline(request.prompt,generator=generator, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256, height=request.height, width=request.width, output_type="pil").images[0]

+import os
+import gc
+import torch
+import numpy as np
+from PIL import Image
+from typing import Optional
 from diffusers import (
     DiffusionPipeline,
     AutoencoderKL,
     CLIPTokenizer,
     CLIPTextModel
 )
 from pipelines.models import TextToImageRequest
 from torch import Generator
+from torchao.quantization import quantize_, int8_weight_only
+# Pre-configurations
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "expandable_segments:True"
 os.environ["TOKENIZERS_PARALLELISM"] = "True"
 torch._dynamo.config.suppress_errors = True
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.enabled = True
+# Global variables
 Pipeline = None
+CKPT_ID = "black-forest-labs/FLUX.1-schnell"
+CKPT_REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
 def empty_cache():
+    """Utility function to clear GPU memory."""
     gc.collect()
     torch.cuda.empty_cache()
     torch.cuda.reset_max_memory_allocated()
     torch.cuda.reset_peak_memory_stats()
+def load_pipeline() -> FluxPipeline:
+    """Loads the diffusion pipeline with specified models and configurations."""
+    # Load text encoder
+    text_encoder_2 = T5EncoderModel.from_pretrained(
+        "Chrissy1/extra0manQ0",
+        revision="c0db1e82d89825a4664ad873f20d261cbe46e737",
+        subfolder="text_encoder_2",
+        torch_dtype=torch.bfloat16
+    ).to(memory_format=torch.channels_last)
+    # Load transformer
+    transformer_path = os.path.join(
+        HF_HUB_CACHE,
+        "models--Chrissy1--extra0manQ0/snapshots/c0db1e82d89825a4664ad873f20d261cbe46e737/transformer"
+    )
+    transformer = FluxTransformer2DModel.from_pretrained(
+        transformer_path,
+        torch_dtype=torch.bfloat16,
+        use_safetensors=False
+    ).to(memory_format=torch.channels_last)
+    # Load and quantize autoencoder
+    vae = AutoencoderKL.from_pretrained(
+        CKPT_ID,
+        revision=CKPT_REVISION,
+        subfolder="vae",
+        local_files_only=True,
+        torch_dtype=torch.bfloat16
+    )
+    quantize_(vae, int8_weight_only())
+    # Load FluxPipeline
+    pipeline = FluxPipeline.from_pretrained(
+        CKPT_ID,
+        revision=CKPT_REVISION,
+        transformer=transformer,
+        text_encoder_2=text_encoder_2,
+        torch_dtype=torch.bfloat16
+    )
     pipeline.to("cuda")
+    # Warm-up run to ensure the pipeline is ready
     with torch.inference_mode():
+        pipeline(
+            prompt="insensible, timbale, pothery, electrovital, actinogram, taxis, intracerebellar, centrodesmus",
+            width=1024,
+            height=1024,
+            guidance_scale=0.0,
+            num_inference_steps=4,
+            max_sequence_length=256
+        )
     return pipeline
 @torch.no_grad()
+def infer(request: TextToImageRequest, pipeline: FluxPipeline, generator: Generator) -> Image:
+    """Generates an image based on the input request and pipeline."""
+    empty_cache()  # Clear cache before inference
+    result = pipeline(
+        prompt=request.prompt,
+        generator=generator,
+        guidance_scale=0.0,
+        num_inference_steps=4,
+        max_sequence_length=256,
+        height=request.height,
+        width=request.width,
+        output_type="pil"
+    )
+    return result.images[0]