RichardWilliam
/

W_quanto2

Model card Files Files and versions

xet

Community

Your Name commited on Jan 23, 2025

Commit

56a1f31

1 Parent(s): 2cc59cd

u

Browse files

Files changed (1) hide show

src/pipeline.py +53 -18

src/pipeline.py CHANGED Viewed

@@ -30,35 +30,66 @@ REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
 Pipeline = None
 apply_quanto=1
-def reset_cache():
-    gc.collect()
     torch.cuda.empty_cache()
     torch.cuda.reset_max_memory_allocated()
     torch.cuda.reset_peak_memory_stats()
-def load_quanto_text_encoder_2(text_repo_path):
-    with open("quantization_map.json", "r") as f:
-        quantization_map = json.load(f)
-    with open(os.path.join(text_repo_path, "config.json"), "r") as f:
-        t5_config = transformers.T5Config(**json.load(f))
     with torch.device("meta"):
         text_encoder_2 = transformers.T5EncoderModel(t5_config).to(torch.bfloat16)
-    state_dict = None
-    requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cuda"))
     return text_encoder_2
 def load_pipeline() -> Pipeline:
     try:
-        text_repo_path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_T5_bf16/snapshots/63a3d9ef7b586655600ac9bd4e4747d038237761")
-        text_encoder_2 = load_quanto_text_encoder_2(text_repo_path=text_repo_path)
     except:
-        text_encoder_2 =  T5EncoderModel.from_pretrained("RichardWilliam/XULF_T5_bf16",
                     revision = "63a3d9ef7b586655600ac9bd4e4747d038237761",
                     torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
     origin_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
-                    revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
                     torch_dtype=torch.bfloat16)
@@ -75,20 +106,24 @@ def load_pipeline() -> Pipeline:
                         text_encoder_2=text_encoder_2,
                         torch_dtype=torch.bfloat16)
     pipeline.to("cuda")
     for __ in range(3):
         pipeline(prompt="sweet, subordinative, gender, mormyre, arteriolosclerosis, positivism, Antiochianism, palmerite",
-                        width=1024,
-                        height=1024,
-                        guidance_scale=0.0,
-                        num_inference_steps=4,
                         max_sequence_length=256)
     return pipeline
 @torch.no_grad()
 def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
-    reset_cache()
     generator = Generator(pipeline.device).manual_seed(request.seed)

 Pipeline = None
 apply_quanto=1
+import torch
+import gc
+import os
+import json
+import transformers
+def reset_caching():
+    """Clears GPU cache and resets memory statistics."""
     torch.cuda.empty_cache()
     torch.cuda.reset_max_memory_allocated()
     torch.cuda.reset_peak_memory_stats()
+    gc.collect()
+def quanto_T5(_path):
+    """
+    Loads and prepares the T5 encoder model with quantization mapping.
+    Args:
+        _path (str): Path to the text repository containing the config file.
+    Returns:
+        transformers.T5EncoderModel: The loaded and quantized T5 encoder model.
+    """
+    # Load quantization map
+    with open("quantization_map.json", "r") as quant_file:
+        quantization_map = json.load(quant_file)
+    # Load T5 configuration
+    config_path = os.path.join(_path, "config.json")
+    with open(config_path, "r") as config_file:
+        t5_config = transformers.T5Config(**json.load(config_file))
+    # Initialize model on meta device with bfloat16 precision
     with torch.device("meta"):
         text_encoder_2 = transformers.T5EncoderModel(t5_config).to(torch.bfloat16)
+    # Apply quantization mapping
+    # Assuming state_dict is loaded or handled within `requantize`
+    requantize(
+        model=text_encoder_2,
+        state_dict=None,
+        quantization_map=quantization_map,
+        device=torch.device("cuda")
+    )
     return text_encoder_2
 def load_pipeline() -> Pipeline:
     try:
+        _path = os.path.join(HF_HUB_CACHE, "models--RichardWilliam--XULF_T5_bf16/snapshots/63a3d9ef7b586655600ac9bd4e4747d038237761")
+        text_encoder_2 = quanto_T5(_path=_path)
     except:
+        text_encoder_2 =  T5EncoderModel.from_pretrained("RichardWilliam/XULF_T5_bf16",
                     revision = "63a3d9ef7b586655600ac9bd4e4747d038237761",
                     torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
     origin_vae = AutoencoderTiny.from_pretrained("RichardWilliam/XULF_Vae",
+                    revision="3ee225c539465c27adadec45c6e8af50a7397b7d",
                     torch_dtype=torch.bfloat16)
                         text_encoder_2=text_encoder_2,
                         torch_dtype=torch.bfloat16)
     pipeline.to("cuda")
+    try:
+        quantize_(pipeline.origin_vae, int8_weight_only())
+    except:
+        pass
     for __ in range(3):
         pipeline(prompt="sweet, subordinative, gender, mormyre, arteriolosclerosis, positivism, Antiochianism, palmerite",
+                        width=1024,
+                        height=1024,
+                        guidance_scale=0.0,
+                        num_inference_steps=4,
                         max_sequence_length=256)
     return pipeline
 @torch.no_grad()
 def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
+    reset_caching()
     generator = Generator(pipeline.device).manual_seed(request.seed)