Flux1-Depth-Dev / scripts /MergeSafetensors3.py
srcphag's picture
New Diffusers structure
1bf81fc
"""
Convert Diffusers-format FLUX model to ComfyUI-compatible checkpoint.
This handles the proper folder structure and key naming.
"""
from safetensors.torch import save_file, load_file
import os
import json
from pathlib import Path
def convert_diffusers_to_comfyui(
diffusers_folder,
output_path,
fp16=False
):
"""
Convert a Diffusers FLUX model folder to a single ComfyUI checkpoint.
Args:
diffusers_folder: Path to folder containing model_index.json
output_path: Output path for the merged .safetensors file
fp16: If True, convert to float16 to save space
"""
diffusers_folder = Path(diffusers_folder)
# Verify it's a Diffusers model
model_index = diffusers_folder / "model_index.json"
if not model_index.exists():
raise ValueError(f"Not a Diffusers model folder. Missing: {model_index}")
with open(model_index) as f:
config = json.load(f)
print("=" * 80)
print("DIFFUSERS TO COMFYUI CONVERTER")
print("=" * 80)
print(f"\nModel: {config.get('_name_or_path', 'Unknown')}")
print(f"Format: {config.get('_class_name', 'Unknown')}")
merged_state = {}
# ========================================================================
# 1. Load Transformer (main FLUX model)
# ========================================================================
print("\n" + "=" * 80)
print("Loading Transformer...")
print("=" * 80)
transformer_path = diffusers_folder / "transformer"
transformer_file = None
# Find the safetensors file
for file in transformer_path.glob("*.safetensors"):
transformer_file = file
break
if not transformer_file:
raise ValueError(f"No safetensors file found in {transformer_path}")
print(f"Found: {transformer_file.name}")
transformer_state = load_file(str(transformer_file))
print(f"Loaded {len(transformer_state)} transformer parameters")
# Add transformer weights (keep original keys or minimal prefix)
for key, value in transformer_state.items():
if fp16 and value.dtype.is_floating_point:
value = value.half()
merged_state[key] = value
# ========================================================================
# 2. Load VAE
# ========================================================================
print("\n" + "=" * 80)
print("Loading VAE...")
print("=" * 80)
vae_path = diffusers_folder / "vae"
vae_file = None
for file in vae_path.glob("*.safetensors"):
vae_file = file
break
if not vae_file:
print("⚠️ No VAE file found, skipping...")
else:
print(f"Found: {vae_file.name}")
vae_state = load_file(str(vae_file))
print(f"Loaded {len(vae_state)} VAE parameters")
# Add VAE weights with proper prefix
for key, value in vae_state.items():
if fp16 and value.dtype.is_floating_point:
value = value.half()
# Keep original Diffusers VAE key structure
merged_state[key] = value
# ========================================================================
# 3. Load Text Encoders (CLIP + T5)
# ========================================================================
print("\n" + "=" * 80)
print("Loading Text Encoders...")
print("=" * 80)
# CLIP (text_encoder)
clip_path = diffusers_folder / "text_encoder"
if clip_path.exists():
clip_file = None
for file in clip_path.glob("*.safetensors"):
clip_file = file
break
if clip_file:
print(f"Found CLIP: {clip_file.name}")
clip_state = load_file(str(clip_file))
print(f"Loaded {len(clip_state)} CLIP parameters")
for key, value in clip_state.items():
if fp16 and value.dtype.is_floating_point:
value = value.half()
# Keep original structure
merged_state[key] = value
else:
print("⚠️ No CLIP file found")
# T5 (text_encoder_2) - often the largest component
t5_path = diffusers_folder / "text_encoder_2"
if t5_path.exists():
t5_file = None
for file in t5_path.glob("*.safetensors"):
t5_file = file
break
if t5_file:
print(f"Found T5: {t5_file.name}")
print("⚠️ Loading T5 (this may take a while, it's large)...")
t5_state = load_file(str(t5_file))
print(f"Loaded {len(t5_state)} T5 parameters")
for key, value in t5_state.items():
if fp16 and value.dtype.is_floating_point:
value = value.half()
merged_state[key] = value
else:
print("⚠️ No T5 file found")
# ========================================================================
# Save merged checkpoint
# ========================================================================
print("\n" + "=" * 80)
print("Saving merged checkpoint...")
print("=" * 80)
print(f"Total parameters: {len(merged_state):,}")
print(f"Output: {output_path}")
save_file(merged_state, output_path)
size_gb = os.path.getsize(output_path) / (1024**3)
print(f"\n✅ Conversion complete!")
print(f"File size: {size_gb:.2f} GB")
# Show key structure
print("\n" + "=" * 80)
print("Key Structure in Merged File")
print("=" * 80)
sample_keys = list(merged_state.keys())[:10]
print("\nFirst 10 keys:")
for key in sample_keys:
print(f" {key}")
return output_path
def convert_with_working_template(
diffusers_folder,
working_checkpoint,
output_path,
replace_transformer_only=True
):
"""
Use a working checkpoint as template, replacing components from Diffusers model.
This ensures key naming matches what ComfyUI expects.
Args:
diffusers_folder: Path to Diffusers model folder
working_checkpoint: Path to a working ComfyUI checkpoint
output_path: Output path for merged checkpoint
replace_transformer_only: If True, only replace transformer, keep VAE/encoders from template
"""
print("=" * 80)
print("TEMPLATE-BASED CONVERSION")
print("=" * 80)
# Load working checkpoint as template
print("\nLoading template checkpoint...")
template_state = load_file(working_checkpoint)
print(f"Template has {len(template_state)} parameters")
# Get key prefixes from template
template_keys = set(template_state.keys())
transformer_keys = {k for k in template_keys if 'transformer' in k or 'double_blocks' in k or 'single_blocks' in k}
vae_keys = {k for k in template_keys if 'vae' in k.lower() or 'first_stage' in k}
text_encoder_keys = {k for k in template_keys if 'text_encoder' in k or 'clip' in k.lower()}
print(f"\nTemplate structure:")
print(f" Transformer keys: {len(transformer_keys)}")
print(f" VAE keys: {len(vae_keys)}")
print(f" Text encoder keys: {len(text_encoder_keys)}")
# Load transformer from Diffusers
diffusers_folder = Path(diffusers_folder)
transformer_path = diffusers_folder / "transformer"
transformer_file = next(transformer_path.glob("*.safetensors"))
print(f"\nLoading new transformer from: {transformer_file.name}")
new_transformer = load_file(str(transformer_file))
# Replace transformer weights
print("\nReplacing transformer weights...")
merged_state = dict(template_state) # Copy template
# Replace matching keys
replaced = 0
for key in transformer_keys:
if key in new_transformer:
merged_state[key] = new_transformer[key]
replaced += 1
print(f"Replaced {replaced} transformer parameters")
if not replace_transformer_only:
print("\n⚠️ Also replacing VAE and text encoders...")
# Load and replace VAE
vae_file = next((diffusers_folder / "vae").glob("*.safetensors"), None)
if vae_file:
vae_state = load_file(str(vae_file))
for key in vae_keys:
if key in vae_state:
merged_state[key] = vae_state[key]
# Similar for text encoders...
# Save
print(f"\nSaving to {output_path}...")
save_file(merged_state, output_path)
size_gb = os.path.getsize(output_path) / (1024**3)
print(f"✅ Done! File size: {size_gb:.2f} GB")
# Example usage
if __name__ == "__main__":
# Method 1: Direct conversion
# convert_diffusers_to_comfyui(
# diffusers_folder="../",
# output_path="flux1-depth-dev_ComfyMerged.safetensors",
# fp16=True # Set False to keep original precision
# )
#Method 2: Use working checkpoint as template (RECOMMENDED)
convert_with_working_template(
diffusers_folder="../",
working_checkpoint="../quantized/svdq-fp4_r32-flux.1-depth-dev.safetensors",
output_path="svdq-fp4_r32-flux.1-depth-dev_ComfyMerged.safetensors",
replace_transformer_only=True
)