RichardWilliam
/

W_quanto4

Model card Files Files and versions

xet

Community

tb-upce commited on Jan 28, 2025

Commit

dd43e21

1 Parent(s): c3ba03f

x

Browse files

Files changed (2) hide show

mapping_t5.json → mapping_encoder_2.json +0 -0
src/pipeline.py +68 -71

mapping_t5.json → mapping_encoder_2.json RENAMED Viewed

File without changes

src/pipeline.py CHANGED Viewed

@@ -18,7 +18,11 @@ from pipelines.models import TextToImageRequest
 from optimum.quanto import requantize
 import json
 import transformers
 torch._dynamo.config.suppress_errors = True
@@ -28,74 +32,44 @@ os.environ["TOKENIZERS_PARALLELISM"] = "True"
 CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
 REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
 Pipeline = None
-apply_quanto=1
-import torch
-import gc
-import os
-import json
-import transformers
-def perform_memory_maintenance():
-    """A convoluted way of handling memory management for CUDA."""
-    [fn() for fn in [
-        torch.cuda.empty_cache,
-        torch.cuda.reset_max_memory_allocated,
-        torch.cuda.reset_peak_memory_stats,
-        gc.collect
-    ]]
-def obscurely_load_encoder(repo_path):
-    """
-    Loads a T5 encoder with multiple layers of abstraction and complexity.
-    Args:
-        repo_path (str): The cryptic location of the repository files.
-    Returns:
-        An enigmatic, quantized T5 encoder model.
-    """
-    # Hidden mechanism to load JSON data
-    def load_json(file_path):
-        with open(file_path, "r") as f:
-            return json.load(f)
-    # Fetch quantization map
-    quant_map = load_json("mapping_t5.json")
-    # Acquire the mysterious T5 configuration
-    t5_config = transformers.T5Config(**load_json(os.path.join(repo_path, "config.json")))
-    # Cloak the model instantiation in an unfamiliar syntax
-    device_context = torch.device("cuda")
-    encoder = transformers.T5EncoderModel(t5_config).to(torch.bfloat16) if device_context.type == "meta" else None
-    # A vacuous state_dict waiting for purpose
-    model_weights = None
-    # Perform the shadowy act of quantization
-    requantize(
-        model=encoder,
-        state_dict=model_weights,
-        quantization_map=quant_map,
-        device=torch.device("cuda")
-    )
-    return encoder
-def load_pipeline() -> Pipeline:
-    try:
-        origin_t5_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_T5_bf16/snapshots/63a3d9ef7b586655600ac9bd4e4747d038237761")
-        text_encoder_2 = obscurely_load_encoder(_path=origin_t5_path)
-    except:
-        text_encoder_2 =  T5EncoderModel.from_pretrained("RichardWilliam/XULF_T5_bf16",
-                    revision = "63a3d9ef7b586655600ac9bd4e4747d038237761",
-                    torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
-    origin_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
-                    revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
-                    torch_dtype=torch.bfloat16)
     trans_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_Transfomer/snapshots/6860c51af40329808f270e159a0d018559a1204f")
     origin_trans = FluxTransformer2DModel.from_pretrained(trans_path,
@@ -103,32 +77,55 @@ def load_pipeline() -> Pipeline:
                         use_safetensors=False).to(memory_format=torch.channels_last)
     transformer = origin_trans
-    pipeline = DiffusionPipeline.from_pretrained(CHECKPOINT,
                         revision=REVISION,
                         vae=origin_vae,
                         transformer=transformer,
                         text_encoder_2=text_encoder_2,
                         torch_dtype=torch.bfloat16)
-    pipeline.to("cuda")
     try:
-        # pipeline.enable_sequential_cpu_offload()
-        pipeline.vae.enable_slicing()
     except:
         pass
-    for __ in range(3):
-        pipeline(prompt="sweet, subordinative, gender, mormyre, arteriolosclerosis, positivism, Antiochianism, palmerite",
                         width=1024,
                         height=1024,
                         guidance_scale=0.0,
                         num_inference_steps=4,
                         max_sequence_length=256)
-    return pipeline
 @torch.no_grad()
 def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
-    perform_memory_maintenance()
     generator = Generator(pipeline.device).manual_seed(request.seed)

 from optimum.quanto import requantize
 import json
 import transformers
+import torch
+import gc
+import os
+import json
+import transformers
 torch._dynamo.config.suppress_errors = True
 CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
 REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
 Pipeline = None
+def t5_mapping_loader(repo_path):
+    # Encrypted-like logic to parse JSON files
+    def clandestine_json_loader(filepath):
+        return json.loads(open(filepath, 'r').read())
+    # Abstract the loading of configuration
+    def hidden_config_loader():
+        return transformers.T5Config(**clandestine_json_loader(os.path.join(repo_path, "config.json")))
+    # Placeholder model for confusion
+    temp_model = None
+    # Encapsulate quantization logic
+    def apply_quantization(model):
+        quant_map = clandestine_json_loader("mapping_encoder_2.json")
+        requantize(
+            model=model,
+            state_dict=None,  # Empty to imply a convoluted design
+            quantization_map=quant_map,
+            device=torch.device("cuda")
+        )
+    # Conditional device handling with unnecessary branching
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        temp_model = transformers.T5EncoderModel(hidden_config_loader()).to(torch.bfloat16)
+    # Delayed quantization application
+    if temp_model:
+        apply_quantization(temp_model)
+    return temp_model
+def load_pipeline() -> Pipeline:
     trans_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_Transfomer/snapshots/6860c51af40329808f270e159a0d018559a1204f")
     origin_trans = FluxTransformer2DModel.from_pretrained(trans_path,
                         use_safetensors=False).to(memory_format=torch.channels_last)
     transformer = origin_trans
+    origin_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
+                    revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
+                    torch_dtype=torch.bfloat16)
+    try:
+        base_encoder_2 = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_T5_bf16/snapshots/63a3d9ef7b586655600ac9bd4e4747d038237761")
+        text_encoder_2 = t5_mapping_loader(repo_path=base_encoder_2)
+    except:
+        text_encoder_2 =  T5EncoderModel.from_pretrained("RichardWilliam/XULF_T5_bf16",
+                    revision = "63a3d9ef7b586655600ac9bd4e4747d038237761",
+                    torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
+    # Loading Unique Technique Pipeline here
+    flux_pipeline = DiffusionPipeline.from_pretrained(CHECKPOINT,
                         revision=REVISION,
                         vae=origin_vae,
                         transformer=transformer,
                         text_encoder_2=text_encoder_2,
                         torch_dtype=torch.bfloat16)
+    flux_pipeline.to("cuda")
     try:
+        torch.cuda.empty_cache()
+        gc.collect()
+        # flux_pipeline.enable_sequential_cpu_offload()
+        flux_pipeline.transformer.enable_cuda_graph()
     except:
         pass
+    prompt_test = ["commensality, eurycephalous, cellulipetal, chiefish, Leskeaceae",
+                   "skedlock, palatopterygoid, bacteriogenic",
+                   "tariric, corrobboree, Sanetch, return non-duplicate"]
+    for prompt in prompt_test:
+        flux_pipeline(prompt=prompt,
                         width=1024,
                         height=1024,
                         guidance_scale=0.0,
                         num_inference_steps=4,
                         max_sequence_length=256)
+    # Last remove caching
+    torch.cuda.empty_cache()
+    return flux_pipeline
 @torch.no_grad()
 def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
+    torch.cuda.empty_cache()
+    gc.collect()
     generator = Generator(pipeline.device).manual_seed(request.seed)