import gradio as gr import torch import os import gc import shutil import requests import json import struct import numpy as np import re from pathlib import Path from typing import Dict, Any, Optional from huggingface_hub import HfApi, hf_hub_download, list_repo_files, login from safetensors.torch import load_file, save_file from tqdm import tqdm # --- Memory Efficient Safetensors --- class MemoryEfficientSafeOpen: """ Reads safetensors metadata and tensors without mmap, keeping RAM usage low. Essential for running on limited hardware. """ def __init__(self, filename): self.filename = filename self.file = open(filename, "rb") self.header, self.header_size = self._read_header() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.file.close() def keys(self) -> list[str]: return [k for k in self.header.keys() if k != "__metadata__"] def metadata(self) -> Dict[str, str]: return self.header.get("__metadata__", {}) def get_tensor(self, key): if key not in self.header: raise KeyError(f"Tensor '{key}' not found in the file") metadata = self.header[key] offset_start, offset_end = metadata["data_offsets"] self.file.seek(self.header_size + 8 + offset_start) tensor_bytes = self.file.read(offset_end - offset_start) return self._deserialize_tensor(tensor_bytes, metadata) def _read_header(self): header_size = struct.unpack(" os.path.getsize(path) or header_size <= 0: return False return True except: return False def download_file(input_path, token, filename=None): """Downloads a file from URL or HF Repo.""" local_path = TempDir / (filename if filename else "model.safetensors") if input_path.startswith("http"): print(f"Downloading from URL: {input_path}") try: response = requests.get(input_path, stream=True, timeout=30) response.raise_for_status() with open(local_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) except Exception as e: raise ValueError(f"Failed to download URL. Check your link. Error: {e}") else: print(f"Downloading from Repo: {input_path}") if not filename: try: files = list_repo_files(repo_id=input_path, token=token) safetensors = [f for f in files if f.endswith(".safetensors")] if safetensors: filename = safetensors[0] for f in safetensors: if "adapter" in f: filename = f else: filename = "adapter_model.bin" except Exception as e: filename = "adapter_model.safetensors" try: hf_hub_download(repo_id=input_path, filename=filename, token=token, local_dir=TempDir, local_dir_use_symlinks=False) downloaded_path = TempDir / filename if downloaded_path != local_path: if local_path.exists(): os.remove(local_path) shutil.move(downloaded_path, local_path) except Exception as e: raise ValueError(f"Failed to download from HF Repo. Check ID/Token. Error: {e}") if not verify_safetensors(local_path): raise ValueError(f"Downloaded file is NOT a valid safetensors file. Check your URL/Repo. (File size: {os.path.getsize(local_path)} bytes)") return local_path def get_key_stem(key): key = key.replace(".weight", "").replace(".bias", "") key = key.replace(".lora_down", "").replace(".lora_up", "") key = key.replace(".lora_A", "").replace(".lora_B", "") key = key.replace(".alpha", "") prefixes = [ "model.diffusion_model.", "diffusion_model.", "model.", "transformer.", "text_encoder.", "lora_unet_", "lora_te_", "base_model.model." ] changed = True while changed: changed = False for p in prefixes: if key.startswith(p): key = key[len(p):] changed = True return key # ================================================================================= # TAB 1: UNIVERSAL MERGE (In-Place Memory Optimization) # ================================================================================= def load_lora_to_memory(lora_path, precision_dtype=torch.bfloat16): print(f"Loading LoRA from {lora_path} in {precision_dtype}...") state_dict = load_file(lora_path, device="cpu") pairs = {} alphas = {} for k, v in state_dict.items(): stem = get_key_stem(k) if "alpha" in k: alphas[stem] = v.item() if isinstance(v, torch.Tensor) else v else: if stem not in pairs: pairs[stem] = {} # Cast immediately to save RAM tensor_low = v.to(dtype=precision_dtype) if "lora_down" in k or "lora_A" in k: pairs[stem]["down"] = tensor_low pairs[stem]["rank"] = v.shape[0] elif "lora_up" in k or "lora_B" in k: pairs[stem]["up"] = tensor_low for stem in pairs: if stem in alphas: pairs[stem]["alpha"] = alphas[stem] else: if "rank" in pairs[stem]: pairs[stem]["alpha"] = float(pairs[stem]["rank"]) else: pairs[stem]["alpha"] = 1.0 return pairs def merge_shard_logic(base_path, lora_pairs, scale, output_path, precision_dtype=torch.bfloat16): print(f"Loading base shard: {base_path}") base_state = load_file(base_path, device="cpu") lora_keys = set(lora_pairs.keys()) keys_to_process = list(base_state.keys()) for k in keys_to_process: # Don't detach v yet, we modify in place v = base_state[k] base_stem = get_key_stem(k) match = None # 1. Exact Match if base_stem in lora_keys: match = lora_pairs[base_stem] else: # 2. Heuristic Match (Z-Image QKV split) if "to_q" in base_stem: qkv_stem = base_stem.replace("to_q", "qkv") if qkv_stem in lora_keys: match = lora_pairs[qkv_stem] elif "to_k" in base_stem: qkv_stem = base_stem.replace("to_k", "qkv") if qkv_stem in lora_keys: match = lora_pairs[qkv_stem] elif "to_v" in base_stem: qkv_stem = base_stem.replace("to_v", "qkv") if qkv_stem in lora_keys: match = lora_pairs[qkv_stem] if match and "down" in match and "up" in match: # Weights are already in precision_dtype from load_lora_to_memory down = match["down"] up = match["up"] alpha = match["alpha"] rank = match["rank"] scaling = scale * (alpha / rank) # Handle Conv 1x1 squeeze if len(v.shape) == 4 and len(down.shape) == 2: down = down.unsqueeze(-1).unsqueeze(-1) up = up.unsqueeze(-1).unsqueeze(-1) # Compute Delta in Low Precision try: if len(up.shape) == 4: delta = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], 1, 1) else: delta = up @ down except: delta = up.T @ down delta = delta * scaling valid_delta = True # --- Dynamic Reshaping / Slicing --- if delta.shape == v.shape: pass elif delta.shape[0] == v.shape[0] * 3: chunk_size = v.shape[0] if "to_q" in k: delta = delta[0:chunk_size, ...] elif "to_k" in k: delta = delta[chunk_size:2*chunk_size, ...] elif "to_v" in k: delta = delta[2*chunk_size:, ...] else: valid_delta = False elif delta.numel() == v.numel(): delta = delta.reshape(v.shape) else: print(f"Skipping {k}: Mismatch. Base: {v.shape}, Delta: {delta.shape}") valid_delta = False if valid_delta: # Optimized In-Place Addition # We do NOT cast base to float32. We trust bf16/fp16 is sufficient for merging. # If base is float32 (rare for new models), we respect it. # If base is bf16, we add bf16 delta. if v.dtype != delta.dtype: delta = delta.to(v.dtype) # In-place add v.add_(delta) # Explicit cleanup del delta # Periodic GC if len(keys_to_process) > 100 and keys_to_process.index(k) % 50 == 0: gc.collect() save_file(base_state, output_path) return True def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, output_repo, structure_repo, private, precision, progress=gr.Progress()): cleanup_temp() login(hf_token) # Determine Dtype if precision == "bf16": dtype = torch.bfloat16 elif precision == "fp16": dtype = torch.float16 else: dtype = torch.float32 print(f"Selected Precision: {dtype}") try: api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token) except Exception as e: return f"Error creating repo: {e}" if structure_repo: print("Cloning structure...") try: files = list_repo_files(repo_id=structure_repo, token=hf_token) for f in files: if not f.endswith(".safetensors") and not f.endswith(".bin"): try: path = hf_hub_download(repo_id=structure_repo, filename=f, token=hf_token) api.upload_file(path_or_fileobj=path, path_in_repo=f, repo_id=output_repo, token=hf_token) except: pass except Exception as e: print(f"Structure clone warning: {e}") try: progress(0.1, desc="Downloading LoRA...") lora_path = download_file(lora_input, hf_token) # Load LoRA in target precision to save RAM immediately lora_pairs = load_lora_to_memory(lora_path, precision_dtype=dtype) except Exception as e: return f"CRITICAL ERROR: {str(e)}" files = list_repo_files(repo_id=base_repo, token=hf_token) shards = [f for f in files if f.endswith(".safetensors")] if base_subfolder: shards = [f for f in shards if f.startswith(base_subfolder)] if not shards: return "Error: No safetensors found in base." for i, shard in enumerate(shards): progress(0.2 + (0.8 * i/len(shards)), desc=f"Merging {shard}") local_shard = hf_hub_download(repo_id=base_repo, filename=shard, token=hf_token, local_dir=TempDir) merged_path = TempDir / "merged.safetensors" # Pass precision preference merge_shard_logic(local_shard, lora_pairs, scale, merged_path, precision_dtype=dtype) api.upload_file(path_or_fileobj=merged_path, path_in_repo=shard, repo_id=output_repo, token=hf_token) os.remove(local_shard) if merged_path.exists(): os.remove(merged_path) gc.collect() return f"Done! Model at https://huggingface.co/{output_repo}" # ================================================================================= # TAB 2: EXTRACT LORA # ================================================================================= def extract_lora_layer_by_layer(model_org, model_tuned, rank, clamp): org = MemoryEfficientSafeOpen(model_org) tuned = MemoryEfficientSafeOpen(model_tuned) lora_sd = {} print("Calculating diffs and running SVD (Layer-wise)...") keys = list(org.keys()) for key in tqdm(keys): if key not in tuned.keys(): continue mat_org = org.get_tensor(key).float() mat_tuned = tuned.get_tensor(key).float() diff = mat_tuned - mat_org if torch.max(torch.abs(diff)) < 1e-4: continue out_dim, in_dim = diff.shape[:2] r = min(rank, in_dim, out_dim) is_conv = len(diff.shape) == 4 if is_conv: diff = diff.flatten(start_dim=1) try: U, S, Vh = torch.linalg.svd(diff, full_matrices=False) U = U[:, :r] S = S[:r] U = U @ torch.diag(S) Vh = Vh[:r, :] dist = torch.cat([U.flatten(), Vh.flatten()]) hi_val = torch.quantile(dist, clamp) U = U.clamp(-hi_val, hi_val) Vh = Vh.clamp(-hi_val, hi_val) if is_conv: U = U.reshape(out_dim, r, 1, 1) Vh = Vh.reshape(r, in_dim, mat_org.shape[2], mat_org.shape[3]) else: U = U.reshape(out_dim, r) Vh = Vh.reshape(r, in_dim) stem = key.replace(".weight", "") lora_sd[f"{stem}.lora_up.weight"] = U lora_sd[f"{stem}.lora_down.weight"] = Vh lora_sd[f"{stem}.alpha"] = torch.tensor(r).float() except Exception as e: print(f"SVD failed for {key}: {e}") out_path = TempDir / "extracted_lora.safetensors" save_file(lora_sd, out_path) return str(out_path) def task_extract(hf_token, org_repo, tuned_repo, rank, output_repo): cleanup_temp() login(hf_token) print("Downloading models...") p1 = download_file(org_repo, hf_token, "org.safetensors") p2 = download_file(tuned_repo, hf_token, "tuned.safetensors") out = extract_lora_layer_by_layer(p1, p2, int(rank), 0.99) api.create_repo(repo_id=output_repo, exist_ok=True, token=hf_token) api.upload_file(path_or_fileobj=out, path_in_repo="extracted_lora.safetensors", repo_id=output_repo, token=hf_token) return "Extraction Done." # ================================================================================= # TAB 3: MERGE ADAPTERS (EMA) # ================================================================================= def task_merge_adapters(hf_token, lora_urls, beta, output_repo): cleanup_temp() login(hf_token) urls = [u.strip() for u in lora_urls.split(",") if u.strip()] paths = [] for i, url in enumerate(urls): paths.append(download_file(url, hf_token, f"adapter_{i}.safetensors")) if not paths: return "No models found" base_sd = load_file(paths[0], device="cpu") for k in base_sd: if base_sd[k].dtype.is_floating_point: base_sd[k] = base_sd[k].float() for i, path in enumerate(paths[1:]): print(f"Merging {path}") curr = load_file(path, device="cpu") for k in base_sd: if k in curr and "alpha" not in k: base_sd[k] = base_sd[k] * beta + curr[k].float() * (1 - beta) out = TempDir / "merged_adapters.safetensors" save_file(base_sd, out) api.create_repo(repo_id=output_repo, exist_ok=True, token=hf_token) api.upload_file(path_or_fileobj=out, path_in_repo="merged_adapters.safetensors", repo_id=output_repo, token=hf_token) return "Done" # ================================================================================= # TAB 4: RESIZE # ================================================================================= def task_resize(hf_token, lora_input, new_rank, output_repo): cleanup_temp() login(hf_token) path = download_file(lora_input, hf_token) state = load_file(path, device="cpu") new_state = {} print("Resizing...") groups = {} for k in state: stem = get_key_stem(k) stem_simple = k.split(".lora_")[0] if stem_simple not in groups: groups[stem_simple] = {} if "lora_down" in k or "lora_A" in k: groups[stem_simple]["down"] = state[k] if "lora_up" in k or "lora_B" in k: groups[stem_simple]["up"] = state[k] for stem, g in tqdm(groups.items()): if "down" in g and "up" in g: down, up = g["down"].float(), g["up"].float() if len(down.shape) == 4: merged = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], down.shape[2], down.shape[3]) flat = merged.flatten(1) else: merged = up @ down flat = merged U, S, Vh = torch.linalg.svd(flat, full_matrices=False) U = U[:, :new_rank] S = S[:new_rank] U = U @ torch.diag(S) Vh = Vh[:new_rank, :] if len(down.shape) == 4: U = U.reshape(up.shape[0], new_rank, 1, 1) Vh = Vh.reshape(new_rank, down.shape[1], down.shape[2], down.shape[3]) new_state[f"{stem}.lora_down.weight"] = Vh new_state[f"{stem}.lora_up.weight"] = U new_state[f"{stem}.alpha"] = torch.tensor(new_rank).float() out = TempDir / "resized.safetensors" save_file(new_state, out) api.create_repo(repo_id=output_repo, exist_ok=True, token=hf_token) api.upload_file(path_or_fileobj=out, path_in_repo="resized.safetensors", repo_id=output_repo, token=hf_token) return "Done" # ================================================================================= # UI Construction # ================================================================================= css = ".container { max-width: 900px; margin: auto; }" with gr.Blocks() as demo: gr.Markdown("# 🧰 SOONmerge® LoRA Toolkit") with gr.Tabs(): with gr.Tab("Merge (Z-Image Fix)"): t1_token = gr.Textbox(label="Token", type="password") t1_base = gr.Textbox(label="Base Repo", value="ostris/Z-Image-De-Turbo") t1_sub = gr.Textbox(label="Subfolder", value="transformer") t1_lora = gr.Textbox(label="LoRA") t1_scale = gr.Slider(label="Scale", value=1.0, minimum=-1, maximum=2) t1_out = gr.Textbox(label="Output") t1_struct = gr.Textbox(label="Structure Repo", value="Tongyi-MAI/Z-Image-Turbo") t1_btn = gr.Button("Merge") t1_res = gr.Textbox(label="Result") t1_btn.click(task_merge, [t1_token, t1_base, t1_sub, t1_lora, t1_scale, t1_out, t1_struct, gr.Checkbox(value=True, visible=False)], t1_res) with gr.Tab("Extract"): t2_token = gr.Textbox(label="Token", type="password") t2_org = gr.Textbox(label="Original") t2_tun = gr.Textbox(label="Tuned") t2_rank = gr.Number(label="Rank", value=32) t2_out = gr.Textbox(label="Output") t2_btn = gr.Button("Extract") t2_res = gr.Textbox(label="Result") t2_btn.click(task_extract, [t2_token, t2_org, t2_tun, t2_rank, t2_out], t2_res) with gr.Tab("Merge Adapters"): t3_token = gr.Textbox(label="Token", type="password") t3_urls = gr.Textbox(label="URLs (comma sep)") t3_beta = gr.Slider(label="Beta", value=0.9) t3_out = gr.Textbox(label="Output") t3_btn = gr.Button("Merge") t3_res = gr.Textbox(label="Result") t3_btn.click(task_merge_adapters, [t3_token, t3_urls, t3_beta, t3_out], t3_res) with gr.Tab("Resize"): t4_token = gr.Textbox(label="Token", type="password") t4_in = gr.Textbox(label="LoRA") t4_rank = gr.Number(label="Rank", value=8) t4_out = gr.Textbox(label="Output") t4_btn = gr.Button("Resize") t4_res = gr.Textbox(label="Result") t4_btn.click(task_resize, [t4_token, t4_in, t4_rank, t4_out], t4_res) if __name__ == "__main__": demo.queue().launch(css=css, ssr_mode=False)