MyApricity
/

OpsTorch

Model card Files Files and versions

xet

Community

YOURNAME commited on Feb 6, 2025

Commit

0629499

1 Parent(s): e09c84c

x

Browse files

Files changed (2) hide show

src/main.py +3 -3
src/pipeline.py +72 -73

src/main.py CHANGED Viewed

@@ -7,14 +7,14 @@ from pathlib import Path
 from PIL.JpegImagePlugin import JpegImageFile
 from pipelines.models import TextToImageRequest
-from pipeline import load_pipeline, infer
 SOCKET = abspath(Path(__file__).parent.parent / "inferences.sock")
 def main():
     print(f"Loading pipeline")
-    pipeline = load_pipeline()
     print(f"Pipeline loaded! , creating socket at '{SOCKET}'")
@@ -36,7 +36,7 @@ def main():
                     return
-                image = infer(request, pipeline)
                 data = BytesIO()
                 image.save(data, format=JpegImageFile.format)

 from PIL.JpegImagePlugin import JpegImageFile
 from pipelines.models import TextToImageRequest
+from pipeline import pipeline_loader, inference
 SOCKET = abspath(Path(__file__).parent.parent / "inferences.sock")
 def main():
     print(f"Loading pipeline")
+    pipeline = pipeline_loader()
     print(f"Pipeline loaded! , creating socket at '{SOCKET}'")
                     return
+                image = inference(request, pipeline)
                 data = BytesIO()
                 image.save(data, format=JpegImageFile.format)

src/pipeline.py CHANGED Viewed

@@ -6,22 +6,24 @@ import gc
 import json
 import transformers
 from huggingface_hub.constants import HF_HUB_CACHE
-from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
-# ApricityApricityApricityApricityApricityApricityApricityApricityApricityApricityApricity
-from torch import Generator
-from diffusers import FluxTransformer2DModel, DiffusionPipeline
 from PIL.Image import Image
 from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
 from pipelines.models import TextToImageRequest
 from optimum.quanto import requantize
 import json
-# ApricityApricityApricityApricityApricityApricityApricityApricityApricityApricityApricity
 torch._dynamo.config.suppress_errors = True
 os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
@@ -32,102 +34,99 @@ revision_root = "488528b6f815bff1bbc747cf1e0947c77c544665"
 Pipeline = None
 use_com = False
-import torch
-import math
-from typing import Dict, Any
-def remove_cache():
     torch.cuda.empty_cache()
     torch.cuda.reset_max_memory_allocated()
-    gc.collect()
     torch.cuda.reset_peak_memory_stats()
-def text_t5_loader() -> T5EncoderModel:
     print("Loading text encoder...")
-    text_encoder = T5EncoderModel.from_pretrained(
         "city96/t5-v1_1-xxl-encoder-bf16",
         revision="1b9c856aadb864af93c1dcdc226c2774fa67bc86",
         torch_dtype=torch.bfloat16,
     )
-    return text_encoder.to(memory_format=torch.channels_last)
-class StableDiffusionTransformerCompile:
-    def __init__(self, pipeline, optimize=False):
-        self.pipeline = pipeline
-        self.optimize = optimize
-        if self.optimize:
-            self.model_compiling()
-    def model_compiling(self):
-        # Staff doing here
-        self.pipeline.unet = torch.compile(self.pipeline.unet)
-    def __call__(self, *args, **kwargs):
-        return self.pipeline(*args, **kwargs)
-def load_pipeline() -> Pipeline:
-    text_t5_encoder = text_t5_loader()
-    transformer_path__ = os.path.join(HF_HUB_CACHE, "models--MyApricity--FLUX_OPT_SCHNELL_1.2/snapshots/488528b6f815bff1bbc747cf1e0947c77c544665")
-    transformer__ = FluxTransformer2DModel.from_pretrained(transformer_path__, torch_dtype=torch.bfloat16, use_safetensors=False)
     try:
-        pipeline = DiffusionPipeline.from_pretrained(ckpt_root,
-                            revision=revision_root,
-                            transformer=transformer__,
-                            torch_dtype=torch.bfloat16)
-    except:
-        pipeline = DiffusionPipeline.from_pretrained(ckpt_root,
-                            revision=revision_root,
-                            torch_dtype=torch.bfloat16)
-    pipeline.to("cuda")
-    try:
-        compiled_pipeline = StableDiffusionTransformerCompile(pipeline, optimize=False)
-        if use_com:
-            pipeline = compiled_pipeline
-        else:
-            print("Currently not compling affectively")
-        pipeline.disable_vae_compress()
-        pipeline.text_encoder_2 = text_t5_encoder
-    except:
-        print("pipeline")
-    prompt_1 = "albaspidin, pillmonger, palaeocrystalline"
-    pipeline(prompt=prompt_1,
-                    width=1024,
-                    height=1024,
-                    guidance_scale=0.0,
-                    num_inference_steps=4,
-                    max_sequence_length=256)
-    prompt_2 = "obe, kilometrage, circuition"
-    pipeline(prompt=prompt_2,
-                    width=1024,
-                    height=1024,
-                    guidance_scale=0.0,
-                    num_inference_steps=4,
-                    max_sequence_length=256)
-    return pipeline
 @torch.no_grad()
-def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
-    remove_cache()
-    # remove cache here for better result
     generator = Generator(pipeline.device).manual_seed(request.seed)
     return pipeline(

 import json
 import transformers
 from huggingface_hub.constants import HF_HUB_CACHE
+from transformers import T5EncoderModel, T5TokenizerFast
 from PIL.Image import Image
 from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
 from pipelines.models import TextToImageRequest
 from optimum.quanto import requantize
 import json
+from torch import Generator
+from diffusers import FluxTransformer2DModel, DiffusionPipeline
+# MYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMYMY
+# ApricityApricityApricityApricityApricityApricityApricityApricityApricityApricityApricityApricityApricity
+from torch._dynamo import config
+from torch._inductor import config as ind_config
+import torch
+import math
+from typing import Dict, Any
 torch._dynamo.config.suppress_errors = True
 os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
 Pipeline = None
 use_com = False
+def optimize_torch():
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+    torch.backends.cudnn.benchmark = True
+    # torch.backends.cudnn.benchmark_limit = 20
+    torch.set_float32_matmul_precision("high")
+    # config.cache_size_limit = 10000000000
+    # ind_config.shape_padding = True
+try:
+    optimize_torch()
+except:
+    print("nothing wrong")
+def delete_ca_che():
     torch.cuda.empty_cache()
     torch.cuda.reset_max_memory_allocated()
     torch.cuda.reset_peak_memory_stats()
+def pipeline_loader() -> Pipeline:
     print("Loading text encoder...")
+    en = T5EncoderModel.from_pretrained(
         "city96/t5-v1_1-xxl-encoder-bf16",
         revision="1b9c856aadb864af93c1dcdc226c2774fa67bc86",
         torch_dtype=torch.bfloat16,
     )
+    transformer_path_main = os.path.join(HF_HUB_CACHE, "models--MyApricity--FLUX_OPT_SCHNELL_1.2/snapshots/488528b6f815bff1bbc747cf1e0947c77c544665")
+    transformer_model = FluxTransformer2DModel.from_pretrained(transformer_path_main, torch_dtype=torch.bfloat16, use_safetensors=False)
+    pipe = DiffusionPipeline.from_pretrained(ckpt_root,
+                        revision=revision_root,
+                        transformer=transformer_model,
+                        torch_dtype=torch.bfloat16)
+    pipe.to("cuda")
     try:
+        # fuse QKV projections in Transformer and VAE
+        pipe.transformer.fuse_qkv_projections()
+        pipe.vae.fuse_qkv_projections()
+        # switch memory layout to Torch's preferred, channels_last
+        pipe.transformer.to(memory_format=torch.channels_last)
+        pipe.vae.to(memory_format=torch.channels_last)
+        # set torch compile flags
+        config = torch._inductor.config
+        config.disable_progress = False  # show progress bar
+        config.conv_1x1_as_mm = True  # treat 1x1 convolutions as matrix muls
+        # tag the compute-intensive modules, the Transformer and VAE decoder, for compilation
+        pipe.transformer = torch.compile(
+            pipe.transformer, mode="max-autotune", fullgraph=True
+        )
+        pipe.vae.decode = torch.compile(
+            pipe.vae.decode, mode="max-autotune", fullgraph=True
+        )
+        # trigger torch compilation
+        print("running torch compiliation..")
+        pipe(
+            "dummy prompt to trigger torch compilation",
+            output_type="pil",
+            num_inference_steps=4,  # use ~50 for [dev], smaller for [schnell]
+        ).images[0]
+        print("finished torch compilation")
+    except:
+        pipe(
+            "a beautiful girl",
+            output_type="pil",
+            num_inference_steps=4,  # use ~50 for [dev], smaller for [schnell]
+        ).images[0]
+        print("Pass error")
+    return pipe
 @torch.no_grad()
+def inference(request: TextToImageRequest, pipeline: Pipeline) -> Image:
+    delete_ca_che()
     generator = Generator(pipeline.device).manual_seed(request.seed)
     return pipeline(