Spaces:

AlekseyCalvin
/

Soon_Merger

Running

App Files Files Community

AlekseyCalvin commited on 4 days ago

Commit

5af1d7d

verified ·

1 Parent(s): 9281147

Create app.py

Browse files

Files changed (1) hide show

app.py +518 -0

app.py ADDED Viewed

	@@ -0,0 +1,518 @@

+import gradio as gr
+import torch
+import os
+import gc
+import shutil
+import requests
+import json
+import struct
+import numpy as np
+import re
+from pathlib import Path
+from typing import Dict, Any, Optional
+from huggingface_hub import HfApi, hf_hub_download, list_repo_files, login
+from safetensors.torch import load_file, save_file
+from tqdm import tqdm
+# --- Memory Efficient Safetensors ---
+class MemoryEfficientSafeOpen:
+    """
+    Reads safetensors metadata and tensors without mmap, keeping RAM usage low.
+    Essential for running on limited hardware.
+    """
+    def __init__(self, filename):
+        self.filename = filename
+        self.file = open(filename, "rb")
+        self.header, self.header_size = self._read_header()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.file.close()
+    def keys(self) -> list[str]:
+        return [k for k in self.header.keys() if k != "__metadata__"]
+    def metadata(self) -> Dict[str, str]:
+        return self.header.get("__metadata__", {})
+    def get_tensor(self, key):
+        if key not in self.header:
+            raise KeyError(f"Tensor '{key}' not found in the file")
+        metadata = self.header[key]
+        offset_start, offset_end = metadata["data_offsets"]
+        self.file.seek(self.header_size + 8 + offset_start)
+        tensor_bytes = self.file.read(offset_end - offset_start)
+        return self._deserialize_tensor(tensor_bytes, metadata)
+    def _read_header(self):
+        header_size = struct.unpack("<Q", self.file.read(8))[0]
+        header_json = self.file.read(header_size).decode("utf-8")
+        return json.loads(header_json), header_size
+    def _deserialize_tensor(self, tensor_bytes, metadata):
+        dtype_map = {
+            "F32": torch.float32, "F16": torch.float16, "BF16": torch.bfloat16,
+            "I64": torch.int64, "I32": torch.int32, "I16": torch.int16, "I8": torch.int8,
+            "U8": torch.uint8, "BOOL": torch.bool
+        }
+        dtype = dtype_map[metadata["dtype"]]
+        shape = metadata["shape"]
+        return torch.frombuffer(tensor_bytes, dtype=torch.uint8).view(dtype).reshape(shape)
+# --- Constants & Setup ---
+TempDir = Path("./temp_tool")
+os.makedirs(TempDir, exist_ok=True)
+api = HfApi()
+def cleanup_temp():
+    if TempDir.exists():
+        shutil.rmtree(TempDir)
+    os.makedirs(TempDir, exist_ok=True)
+    gc.collect()
+def download_file(input_path, token, filename=None):
+    """Downloads a file from URL or HF Repo."""
+    local_path = TempDir / (filename if filename else "model.safetensors")
+    if input_path.startswith("http"):
+        print(f"Downloading from URL: {input_path}")
+        response = requests.get(input_path, stream=True)
+        response.raise_for_status()
+        with open(local_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+    else:
+        print(f"Downloading from Repo: {input_path}")
+        if not filename:
+            try:
+                files = list_repo_files(repo_id=input_path, token=token)
+                safetensors = [f for f in files if f.endswith(".safetensors")]
+                if safetensors:
+                    filename = safetensors[0]
+                    for f in safetensors:
+                        if "adapter" in f: filename = f
+                else:
+                    filename = "adapter_model.bin"
+            except:
+                filename = "adapter_model.safetensors"
+        hf_hub_download(repo_id=input_path, filename=filename, token=token, local_dir=TempDir, local_dir_use_symlinks=False)
+        downloaded_path = TempDir / filename
+        if downloaded_path != local_path:
+            if local_path.exists(): os.remove(local_path)
+            shutil.move(downloaded_path, local_path)
+    return local_path
+def get_key_stem(key):
+    """
+    Normalizes a key to its structural stem by removing known prefixes and suffixes.
+    matches 'layers.0.attention' with 'model.diffusion_model.layers.0.attention'.
+    """
+    key = key.replace(".weight", "").replace(".bias", "")
+    key = key.replace(".lora_down", "").replace(".lora_up", "")
+    key = key.replace(".lora_A", "").replace(".lora_B", "")
+    key = key.replace(".alpha", "")
+    prefixes = [
+        "model.diffusion_model.", "diffusion_model.", "model.",
+        "transformer.", "text_encoder.", "lora_unet_", "lora_te_",
+        "base_model.model."
+    ]
+    changed = True
+    while changed:
+        changed = False
+        for p in prefixes:
+            if key.startswith(p):
+                key = key[len(p):]
+                changed = True
+    return key
+# =================================================================================
+# TAB 1: UNIVERSAL MERGE (In-Place Memory Optimization)
+# =================================================================================
+def load_lora_to_memory(lora_path):
+    print(f"Loading LoRA from {lora_path}...")
+    state_dict = load_file(lora_path, device="cpu")
+    pairs = {}
+    alphas = {}
+    for k, v in state_dict.items():
+        stem = get_key_stem(k)
+        if "alpha" in k:
+            alphas[stem] = v.item() if isinstance(v, torch.Tensor) else v
+        else:
+            if stem not in pairs:
+                pairs[stem] = {}
+            if "lora_down" in k or "lora_A" in k:
+                pairs[stem]["down"] = v.float()
+                pairs[stem]["rank"] = v.shape[0]
+            elif "lora_up" in k or "lora_B" in k:
+                pairs[stem]["up"] = v.float()
+    for stem in pairs:
+        if stem in alphas:
+            pairs[stem]["alpha"] = alphas[stem]
+        else:
+            if "rank" in pairs[stem]:
+                pairs[stem]["alpha"] = float(pairs[stem]["rank"])
+            else:
+                pairs[stem]["alpha"] = 1.0
+    return pairs
+def merge_shard_logic(base_path, lora_pairs, scale, output_path):
+    print(f"Loading base shard: {base_path}")
+    # Load base state into RAM. This is the peak memory usage point.
+    base_state = load_file(base_path, device="cpu")
+    lora_keys = set(lora_pairs.keys())
+    keys_to_process = list(base_state.keys())
+    for k in keys_to_process:
+        v = base_state[k]
+        base_stem = get_key_stem(k)
+        match = None
+        # 1. Exact Match
+        if base_stem in lora_keys:
+            match = lora_pairs[base_stem]
+        else:
+            # 2. Heuristic Match (Z-Image QKV split)
+            if "to_q" in base_stem:
+                qkv_stem = base_stem.replace("to_q", "qkv")
+                if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
+            elif "to_k" in base_stem:
+                qkv_stem = base_stem.replace("to_k", "qkv")
+                if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
+            elif "to_v" in base_stem:
+                qkv_stem = base_stem.replace("to_v", "qkv")
+                if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
+        if match and "down" in match and "up" in match:
+            down = match["down"]
+            up = match["up"]
+            alpha = match["alpha"]
+            rank = match["rank"]
+            scaling = scale * (alpha / rank)
+            # Handle Conv 1x1 squeeze
+            if len(v.shape) == 4 and len(down.shape) == 2:
+                down = down.unsqueeze(-1).unsqueeze(-1)
+                up = up.unsqueeze(-1).unsqueeze(-1)
+            try:
+                if len(up.shape) == 4:
+                    delta = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], 1, 1)
+                else:
+                    delta = up @ down
+            except:
+                delta = up.T @ down
+            delta = delta * scaling
+            # --- Dynamic Reshaping / Slicing ---
+            valid_delta = True
+            if delta.shape == v.shape:
+                pass
+            elif delta.shape[0] == v.shape[0] * 3:
+                chunk_size = v.shape[0]
+                if "to_q" in k:
+                    delta = delta[0:chunk_size, ...]
+                elif "to_k" in k:
+                    delta = delta[chunk_size:2*chunk_size, ...]
+                elif "to_v" in k:
+                    delta = delta[2*chunk_size:, ...]
+                else:
+                    valid_delta = False
+            elif delta.numel() == v.numel():
+                delta = delta.reshape(v.shape)
+            else:
+                print(f"Skipping {k}: Mismatch. Base: {v.shape}, Delta: {delta.shape}")
+                valid_delta = False
+            if valid_delta:
+                # IN-PLACE MERGE to save memory
+                # 1. Promote to float32
+                # 2. Add delta
+                # 3. Cast back to original dtype
+                # 4. Replace in dict
+                orig_dtype = v.dtype
+                # Perform add in float32 to avoid overflow/precision issues
+                # Create temp float tensor
+                v_float = v.to(torch.float32)
+                v_float.add_(delta) # In-place add
+                # Cast back and replace in dict
+                base_state[k] = v_float.to(orig_dtype)
+                # Explicit cleanup
+                del v_float
+                del delta
+                # del v # v is a reference to base_state[k], which we just overwrote
+        # Periodic GC to prevent fragmentation OOM
+        if len(keys_to_process) > 100 and keys_to_process.index(k) % 50 == 0:
+            gc.collect()
+    save_file(base_state, output_path)
+    return True
+def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, output_repo, structure_repo, private, progress=gr.Progress()):
+    cleanup_temp()
+    login(hf_token)
+    try:
+        api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
+    except Exception as e:
+        return f"Error creating repo: {e}"
+    if structure_repo:
+        print("Cloning structure...")
+        try:
+            files = list_repo_files(repo_id=structure_repo, token=hf_token)
+            for f in files:
+                if not f.endswith(".safetensors") and not f.endswith(".bin"):
+                    try:
+                        path = hf_hub_download(repo_id=structure_repo, filename=f, token=hf_token)
+                        api.upload_file(path_or_fileobj=path, path_in_repo=f, repo_id=output_repo, token=hf_token)
+                    except: pass
+        except Exception as e:
+            print(f"Structure clone warning: {e}")
+    progress(0.1, desc="Loading LoRA...")
+    lora_path = download_file(lora_input, hf_token)
+    lora_pairs = load_lora_to_memory(lora_path)
+    files = list_repo_files(repo_id=base_repo, token=hf_token)
+    shards = [f for f in files if f.endswith(".safetensors")]
+    if base_subfolder:
+        shards = [f for f in shards if f.startswith(base_subfolder)]
+    if not shards: return "Error: No safetensors found in base."
+    for i, shard in enumerate(shards):
+        progress(0.2 + (0.8 * i/len(shards)), desc=f"Merging {shard}")
+        local_shard = hf_hub_download(repo_id=base_repo, filename=shard, token=hf_token, local_dir=TempDir)
+        merged_path = TempDir / "merged.safetensors"
+        # Merge Logic
+        merge_shard_logic(local_shard, lora_pairs, scale, merged_path)
+        # Upload
+        api.upload_file(path_or_fileobj=merged_path, path_in_repo=shard, repo_id=output_repo, token=hf_token)
+        # Cleanup immediately
+        os.remove(local_shard)
+        if merged_path.exists(): os.remove(merged_path)
+        gc.collect()
+    return f"Done! Model at https://huggingface.co/{output_repo}"
+# =================================================================================
+# TAB 2: EXTRACT LORA
+# =================================================================================
+def extract_lora_layer_by_layer(model_org, model_tuned, rank, clamp):
+    org = MemoryEfficientSafeOpen(model_org)
+    tuned = MemoryEfficientSafeOpen(model_tuned)
+    lora_sd = {}
+    print("Calculating diffs and running SVD (Layer-wise)...")
+    keys = list(org.keys())
+    for key in tqdm(keys):
+        if key not in tuned.keys(): continue
+        mat_org = org.get_tensor(key).float()
+        mat_tuned = tuned.get_tensor(key).float()
+        diff = mat_tuned - mat_org
+        if torch.max(torch.abs(diff)) < 1e-4: continue
+        out_dim, in_dim = diff.shape[:2]
+        r = min(rank, in_dim, out_dim)
+        is_conv = len(diff.shape) == 4
+        if is_conv: diff = diff.flatten(start_dim=1)
+        try:
+            U, S, Vh = torch.linalg.svd(diff, full_matrices=False)
+            U = U[:, :r]
+            S = S[:r]
+            U = U @ torch.diag(S)
+            Vh = Vh[:r, :]
+            dist = torch.cat([U.flatten(), Vh.flatten()])
+            hi_val = torch.quantile(dist, clamp)
+            U = U.clamp(-hi_val, hi_val)
+            Vh = Vh.clamp(-hi_val, hi_val)
+            if is_conv:
+                U = U.reshape(out_dim, r, 1, 1)
+                Vh = Vh.reshape(r, in_dim, mat_org.shape[2], mat_org.shape[3])
+            else:
+                U = U.reshape(out_dim, r)
+                Vh = Vh.reshape(r, in_dim)
+            stem = key.replace(".weight", "")
+            lora_sd[f"{stem}.lora_up.weight"] = U
+            lora_sd[f"{stem}.lora_down.weight"] = Vh
+            lora_sd[f"{stem}.alpha"] = torch.tensor(r).float()
+        except Exception as e:
+            print(f"SVD failed for {key}: {e}")
+    out_path = TempDir / "extracted_lora.safetensors"
+    save_file(lora_sd, out_path)
+    return str(out_path)
+def task_extract(hf_token, org_repo, tuned_repo, rank, output_repo):
+    cleanup_temp()
+    login(hf_token)
+    print("Downloading models...")
+    p1 = download_file(org_repo, hf_token, "org.safetensors")
+    p2 = download_file(tuned_repo, hf_token, "tuned.safetensors")
+    out = extract_lora_layer_by_layer(p1, p2, int(rank), 0.99)
+    api.create_repo(repo_id=output_repo, exist_ok=True, token=hf_token)
+    api.upload_file(path_or_fileobj=out, path_in_repo="extracted_lora.safetensors", repo_id=output_repo, token=hf_token)
+    return "Extraction Done."
+# =================================================================================
+# TAB 3: MERGE ADAPTERS (EMA)
+# =================================================================================
+def task_merge_adapters(hf_token, lora_urls, beta, output_repo):
+    cleanup_temp()
+    login(hf_token)
+    urls = [u.strip() for u in lora_urls.split(",") if u.strip()]
+    paths = []
+    for i, url in enumerate(urls):
+        paths.append(download_file(url, hf_token, f"adapter_{i}.safetensors"))
+    if not paths: return "No models found"
+    base_sd = load_file(paths[0], device="cpu")
+    for k in base_sd:
+        if base_sd[k].dtype.is_floating_point: base_sd[k] = base_sd[k].float()
+    for i, path in enumerate(paths[1:]):
+        print(f"Merging {path}")
+        curr = load_file(path, device="cpu")
+        for k in base_sd:
+            if k in curr and "alpha" not in k:
+                base_sd[k] = base_sd[k] * beta + curr[k].float() * (1 - beta)
+    out = TempDir / "merged_adapters.safetensors"
+    save_file(base_sd, out)
+    api.create_repo(repo_id=output_repo, exist_ok=True, token=hf_token)
+    api.upload_file(path_or_fileobj=out, path_in_repo="merged_adapters.safetensors", repo_id=output_repo, token=hf_token)
+    return "Done"
+# =================================================================================
+# TAB 4: RESIZE
+# =================================================================================
+def task_resize(hf_token, lora_input, new_rank, output_repo):
+    cleanup_temp()
+    login(hf_token)
+    path = download_file(lora_input, hf_token)
+    state = load_file(path, device="cpu")
+    new_state = {}
+    print("Resizing...")
+    groups = {}
+    for k in state:
+        stem = get_key_stem(k)
+        stem_simple = k.split(".lora_")[0]
+        if stem_simple not in groups: groups[stem_simple] = {}
+        if "lora_down" in k or "lora_A" in k: groups[stem_simple]["down"] = state[k]
+        if "lora_up" in k or "lora_B" in k: groups[stem_simple]["up"] = state[k]
+    for stem, g in tqdm(groups.items()):
+        if "down" in g and "up" in g:
+            down, up = g["down"].float(), g["up"].float()
+            if len(down.shape) == 4:
+                merged = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], down.shape[2], down.shape[3])
+                flat = merged.flatten(1)
+            else:
+                merged = up @ down
+                flat = merged
+            U, S, Vh = torch.linalg.svd(flat, full_matrices=False)
+            U = U[:, :new_rank]
+            S = S[:new_rank]
+            U = U @ torch.diag(S)
+            Vh = Vh[:new_rank, :]
+            if len(down.shape) == 4:
+                U = U.reshape(up.shape[0], new_rank, 1, 1)
+                Vh = Vh.reshape(new_rank, down.shape[1], down.shape[2], down.shape[3])
+            new_state[f"{stem}.lora_down.weight"] = Vh
+            new_state[f"{stem}.lora_up.weight"] = U
+            new_state[f"{stem}.alpha"] = torch.tensor(new_rank).float()
+    out = TempDir / "resized.safetensors"
+    save_file(new_state, out)
+    api.create_repo(repo_id=output_repo, exist_ok=True, token=hf_token)
+    api.upload_file(path_or_fileobj=out, path_in_repo="resized.safetensors", repo_id=output_repo, token=hf_token)
+    return "Done"
+# =================================================================================
+# UI Construction
+# =================================================================================
+css = ".container { max-width: 900px; margin: auto; }"
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧰 SOONmerge® LoRA Toolkit")
+    with gr.Tabs():
+        with gr.Tab("Merge (Z-Image Fix)"):
+            t1_token = gr.Textbox(label="Token", type="password")
+            t1_base = gr.Textbox(label="Base Repo", value="ostris/Z-Image-De-Turbo")
+            t1_sub = gr.Textbox(label="Subfolder", value="transformer")
+            t1_lora = gr.Textbox(label="LoRA")
+            t1_scale = gr.Slider(label="Scale", value=1.0, minimum=-1, maximum=2)
+            t1_out = gr.Textbox(label="Output")
+            t1_struct = gr.Textbox(label="Structure Repo", value="Tongyi-MAI/Z-Image-Turbo")
+            t1_btn = gr.Button("Merge")
+            t1_res = gr.Textbox(label="Result")
+            t1_btn.click(task_merge, [t1_token, t1_base, t1_sub, t1_lora, t1_scale, t1_out, t1_struct, gr.Checkbox(value=True, visible=False)], t1_res)
+        with gr.Tab("Extract"):
+            t2_token = gr.Textbox(label="Token", type="password")
+            t2_org = gr.Textbox(label="Original")
+            t2_tun = gr.Textbox(label="Tuned")
+            t2_rank = gr.Number(label="Rank", value=32)
+            t2_out = gr.Textbox(label="Output")
+            t2_btn = gr.Button("Extract")
+            t2_res = gr.Textbox(label="Result")
+            t2_btn.click(task_extract, [t2_token, t2_org, t2_tun, t2_rank, t2_out], t2_res)
+        with gr.Tab("Merge Adapters"):
+            t3_token = gr.Textbox(label="Token", type="password")
+            t3_urls = gr.Textbox(label="URLs (comma sep)")
+            t3_beta = gr.Slider(label="Beta", value=0.9)
+            t3_out = gr.Textbox(label="Output")
+            t3_btn = gr.Button("Merge")
+            t3_res = gr.Textbox(label="Result")
+            t3_btn.click(task_merge_adapters, [t3_token, t3_urls, t3_beta, t3_out], t3_res)
+        with gr.Tab("Resize"):
+            t4_token = gr.Textbox(label="Token", type="password")
+            t4_in = gr.Textbox(label="LoRA")
+            t4_rank = gr.Number(label="Rank", value=8)
+            t4_out = gr.Textbox(label="Output")
+            t4_btn = gr.Button("Resize")
+            t4_res = gr.Textbox(label="Result")
+            t4_btn.click(task_resize, [t4_token, t4_in, t4_rank, t4_out], t4_res)
+if __name__ == "__main__":
+    demo.queue().launch(css=css, ssr_mode=False)