Spaces:

AlekseyCalvin
/

Soon_Merger

Running

App Files Files Community

AlekseyCalvin commited on Dec 14, 2025

Commit

4419bdb

verified ·

1 Parent(s): a690cfc

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -100

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 import torch
 import os
@@ -154,14 +156,15 @@ class ShardBuffer:
         self.output_repo = output_repo
         self.subfolder = subfolder
         self.hf_token = hf_token
-        self.filename_prefix = filename_prefix  # Dynamic prefix (e.g. 'diffusion_pytorch_model' or 'model')
         self.buffer = []
         self.current_bytes = 0
         self.shard_count = 0
         self.index_map = {}
-        self.total_model_size = 0
     def add_tensor(self, key, tensor):
         if tensor.dtype == torch.bfloat16:
             raw_bytes = tensor.view(torch.int16).numpy().tobytes()
             dtype_str = "BF16"
@@ -173,14 +176,16 @@ class ShardBuffer:
             dtype_str = "F32"
         size = len(raw_bytes)
         self.buffer.append({
             "key": key,
             "data": raw_bytes,
             "dtype": dtype_str,
             "shape": tensor.shape
         })
         self.current_bytes += size
-        self.total_model_size += size
         if self.current_bytes >= self.max_bytes:
             self.flush()
@@ -189,7 +194,8 @@ class ShardBuffer:
         if not self.buffer: return
         self.shard_count += 1
-        # ADAPTIVE NAMING: Uses the prefix detected from the base model
         filename = f"{self.filename_prefix}-{self.shard_count:05d}.safetensors"
         # Proper Subfolder Handling
@@ -206,7 +212,7 @@ class ShardBuffer:
                 "data_offsets": [current_offset, current_offset + len(item["data"])]
             }
             current_offset += len(item["data"])
-            self.index_map[item["key"]] = filename
         header_json = json.dumps(header).encode('utf-8')
@@ -225,44 +231,60 @@ class ShardBuffer:
         self.current_bytes = 0
         gc.collect()
-def streaming_copy_structure(token, src_repo, dst_repo, ignore_prefix="transformer"):
     """
-    Copies files one-by-one from source to dest, skipping 'ignore_prefix'.
-    Does NOT skip .safetensors/.bin if they are outside the ignore folder.
     """
-    print(f"Scanning {src_repo} for auxiliary files...")
     try:
-        files = api.list_repo_files(repo_id=src_repo, token=token)
-        for f in tqdm(files, desc="Copying Structure"):
-            # 1. Skip the folder we are replacing (e.g., transformer/)
-            if ignore_prefix and f.startswith(ignore_prefix):
-                continue
-            # 2. Skip hidden/system files
-            if f.startswith("."):
-                continue
-            # 3. Download -> Upload -> Delete loop
-            # This ensures we get VAE/TextEnc weights without disk overflow
-            try:
-                print(f"Copying {f}...")
-                local = hf_hub_download(repo_id=src_repo, filename=f, token=token, local_dir=TempDir)
-                api.upload_file(
-                    path_or_fileobj=local,
-                    path_in_repo=f,
-                    repo_id=dst_repo,
-                    token=token
-                )
-                if os.path.exists(local):
-                    os.remove(local)
-            except Exception as e:
-                print(f"Failed to copy {f}: {e}")
     except Exception as e:
-        print(f"Structure cloning error: {e}")
 def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision, shard_size, output_repo, structure_repo, private, progress=gr.Progress()):
     cleanup_temp()
@@ -273,62 +295,73 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision
         api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
     except Exception as e: return f"Error creating repo: {e}"
-    # 2. Server-Side Structure Clone
     if structure_repo:
-        ignore = base_subfolder if base_subfolder else None
-        streaming_copy_structure(hf_token, structure_repo, output_repo, ignore)
-    # 3. Load LoRA
-    dtype = torch.bfloat16 if precision == "bf16" else torch.float16 if precision == "fp16" else torch.float32
     try:
-        progress(0.1, desc="Downloading LoRA...")
-        lora_path = download_file(lora_input, hf_token, filename="adapter.safetensors")
-        lora_pairs = load_lora_to_memory(lora_path, precision_dtype=dtype)
-    except Exception as e: return f"Error loading LoRA: {e}"
-    # 4. Stream Process
-    progress(0.2, desc="Fetching File List...")
-    files = list_repo_files(repo_id=base_repo, token=hf_token)
-    # Identify valid shards in the target folder
     input_shards = []
     for f in files:
-        if not f.endswith(".safetensors"): continue
-        if base_subfolder and not f.startswith(base_subfolder): continue
-        input_shards.append(f)
     if not input_shards: return "No base safetensors found in specified location."
     input_shards.sort()
-    # --- AUTO-DETECT NAMING CONVENTION ---
-    # We look at the first file to decide the naming scheme.
-    # Common schemes:
-    #   "diffusion_pytorch_model-00001..." -> prefix: "diffusion_pytorch_model"
-    #   "model-00001..." -> prefix: "model"
-    #   "model.safetensors" -> prefix: "model"
-    first_file = os.path.basename(input_shards[0])
-    if first_file.startswith("diffusion_pytorch_model"):
         filename_prefix = "diffusion_pytorch_model"
         index_filename = "diffusion_pytorch_model.safetensors.index.json"
     else:
-        # Default for LLMs, Text Encoders, etc.
         filename_prefix = "model"
         index_filename = "model.safetensors.index.json"
-    print(f"Detected naming convention: {filename_prefix} (Index: {index_filename})")
-    # Initialize Buffer with detected prefix
-    buffer = ShardBuffer(shard_size, TempDir, output_repo, base_subfolder, hf_token, filename_prefix=filename_prefix)
     for i, shard_file in enumerate(input_shards):
-        progress(0.2 + (0.7 * i / len(input_shards)), desc=f"Processing {shard_file}")
-        local_shard = hf_hub_download(repo_id=base_repo, filename=shard_file, token=hf_token, local_dir=TempDir)
-        with MemoryEfficientSafeOpen(local_shard) as f:
             keys = f.keys()
             for k in keys:
                 v = f.get_tensor(k)
@@ -336,28 +369,24 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision
                 lora_keys = set(lora_pairs.keys())
                 match = None
-                # Matching Logic (Exact + Heuristic for QKV)
-                if base_stem in lora_keys:
-                    match = lora_pairs[base_stem]
-                else:
-                    if "to_q" in base_stem:
                         qkv_stem = base_stem.replace("to_q", "qkv")
                         if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
-                    elif "to_k" in base_stem:
                         qkv_stem = base_stem.replace("to_k", "qkv")
                         if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
-                    elif "to_v" in base_stem:
                         qkv_stem = base_stem.replace("to_v", "qkv")
                         if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
                 if match and "down" in match and "up" in match:
                     down = match["down"]
                     up = match["up"]
-                    alpha = match["alpha"]
-                    rank = match["rank"]
-                    scaling = scale * (alpha / rank)
-                    # Handle Conv 1x1 squeeze
                     if len(v.shape) == 4 and len(down.shape) == 2:
                         down = down.unsqueeze(-1).unsqueeze(-1)
                         up = up.unsqueeze(-1).unsqueeze(-1)
@@ -373,9 +402,7 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision
                     delta = delta * scaling
                     valid_delta = True
-                    # Shape Slicing Logic
-                    if delta.shape == v.shape:
-                        pass
                     elif delta.shape[0] == v.shape[0] * 3:
                         chunk = v.shape[0]
                         if "to_q" in k: delta = delta[0:chunk, ...]
@@ -384,8 +411,7 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision
                         else: valid_delta = False
                     elif delta.numel() == v.numel():
                         delta = delta.reshape(v.shape)
-                    else:
-                        valid_delta = False
                     if valid_delta:
                         v = v.to(dtype)
@@ -397,23 +423,19 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision
                 buffer.add_tensor(k, v)
                 del v
-        os.remove(local_shard)
         gc.collect()
     buffer.flush()
-    # Upload Index (Using the dynamically determined index filename)
-    print(f"Uploading Index: {index_filename}")
-    index_data = {
-        "metadata": {"total_size": buffer.total_model_size},
-        "weight_map": buffer.index_map
-    }
     with open(TempDir / index_filename, "w") as f:
         json.dump(index_data, f, indent=4)
-    path_in_repo = f"{base_subfolder}/{index_filename}" if base_subfolder else index_filename
     api.upload_file(path_or_fileobj=TempDir / index_filename, path_in_repo=path_in_repo, repo_id=output_repo, token=hf_token)
     cleanup_temp()

+MERGE APP EDIT:
 import gradio as gr
 import torch
 import os
         self.output_repo = output_repo
         self.subfolder = subfolder
         self.hf_token = hf_token
+        self.filename_prefix = filename_prefix
         self.buffer = []
         self.current_bytes = 0
         self.shard_count = 0
         self.index_map = {}
+        self.total_size = 0  # Accumulates total model size for index.json
     def add_tensor(self, key, tensor):
+        # Determine bytes for size calculation and storage
         if tensor.dtype == torch.bfloat16:
             raw_bytes = tensor.view(torch.int16).numpy().tobytes()
             dtype_str = "BF16"
             dtype_str = "F32"
         size = len(raw_bytes)
         self.buffer.append({
             "key": key,
             "data": raw_bytes,
             "dtype": dtype_str,
             "shape": tensor.shape
         })
         self.current_bytes += size
+        self.total_size += size  # Explicitly increment total size
         if self.current_bytes >= self.max_bytes:
             self.flush()
         if not self.buffer: return
         self.shard_count += 1
+        # Naming: prefix-0000X.safetensors
+        # This is standard for indexed loading.
         filename = f"{self.filename_prefix}-{self.shard_count:05d}.safetensors"
         # Proper Subfolder Handling
                 "data_offsets": [current_offset, current_offset + len(item["data"])]
             }
             current_offset += len(item["data"])
+            self.index_map[item["key"]] = filename # Relative filename for index
         header_json = json.dumps(header).encode('utf-8')
         self.current_bytes = 0
         gc.collect()
+def download_lora_smart(input_str, token):
     """
+    Handles Repo IDs (user/repo) and Direct URLs.
     """
+    local_path = TempDir / "adapter.safetensors"
+    # 1. Direct URL (Private/Public)
+    if input_str.startswith("http"):
+        print(f"Downloading LoRA from URL: {input_str}")
+        headers = {"Authorization": f"Bearer {token}"} if token else {}
+        try:
+            response = requests.get(input_str, stream=True, headers=headers, timeout=30)
+            response.raise_for_status()
+            with open(local_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            # Basic validation
+            with open(local_path, "rb") as f:
+                if len(f.read(8)) == 8: return local_path
+        except Exception as e:
+            print(f"URL download failed: {e}. Trying as Repo ID...")
+    # 2. Repo ID (Fallback or Primary)
+    # If the user entered a repo ID (e.g. "AlekseyCalvin/MyLora"), this catches it.
+    print(f"Attempting download from Hub Repo: {input_str}")
     try:
+        # Try finding the specific file
+        candidates = ["adapter_model.safetensors", "model.safetensors"]
+        target_file = None
+        try:
+            files = list_repo_files(repo_id=input_str, token=token)
+            safetensors = [f for f in files if f.endswith(".safetensors")]
+            for c in candidates:
+                if c in safetensors:
+                    target_file = c
+                    break
+            if not target_file and safetensors:
+                target_file = safetensors[0]
+        except:
+            # If listing fails, try default
+            target_file = "adapter_model.safetensors"
+        hf_hub_download(repo_id=input_str, filename=target_file, token=token, local_dir=TempDir, local_dir_use_symlinks=False)
+        # Rename to generic name
+        downloaded = TempDir / target_file
+        if downloaded != local_path:
+            if local_path.exists(): os.remove(local_path)
+            shutil.move(downloaded, local_path)
+        return local_path
     except Exception as e:
+        raise ValueError(f"Failed to download LoRA from {input_str}. \nError: {e}")
 def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision, shard_size, output_repo, structure_repo, private, progress=gr.Progress()):
     cleanup_temp()
         api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
     except Exception as e: return f"Error creating repo: {e}"
+    # Define modes
+    output_subfolder = base_subfolder if base_subfolder else ""
+    # 2. Clone Structure
     if structure_repo:
+        print(f"Cloning structure from {structure_repo}...")
+        # Ignore the folder we are overwriting (if any)
+        ignore = output_subfolder if output_subfolder else None
+        # Root merge mode (LLM) usually implies we skip weights in the root
+        is_root_merge = not bool(output_subfolder)
+        streaming_copy_structure(hf_token, structure_repo, output_repo, ignore_prefix=ignore, is_root_merge=is_root_merge)
+    # 3. Download Input Shards
+    progress(0.1, desc="Downloading Base Model...")
     try:
+        files = list_repo_files(repo_id=base_repo, token=hf_token)
+    except Exception as e: return f"Error accessing base repo: {e}"
     input_shards = []
     for f in files:
+        if f.endswith(".safetensors"):
+            # Filter by subfolder if specified
+            if output_subfolder and not f.startswith(output_subfolder): continue
+            local_path = TempDir / "input_shards" / os.path.basename(f)
+            os.makedirs(local_path.parent, exist_ok=True)
+            hf_hub_download(repo_id=base_repo, filename=f, token=hf_token, local_dir=local_path.parent, local_dir_use_symlinks=False)
+            # Locate file (handle nested download paths)
+            found = list(local_path.parent.rglob(os.path.basename(f)))
+            if found: input_shards.append(found[0])
     if not input_shards: return "No base safetensors found in specified location."
     input_shards.sort()
+    # --- NAMING CONVENTION LOGIC ---
+    # 1. Check for Diffusers specific subfolders -> force 'diffusion_pytorch_model'
+    if output_subfolder in ["transformer", "unet"]:
         filename_prefix = "diffusion_pytorch_model"
         index_filename = "diffusion_pytorch_model.safetensors.index.json"
+    # 2. Check input file naming -> adopt input convention
+    elif "diffusion_pytorch_model" in os.path.basename(input_shards[0]):
+        filename_prefix = "diffusion_pytorch_model"
+        index_filename = "diffusion_pytorch_model.safetensors.index.json"
+    # 3. Default to LLM style
     else:
         filename_prefix = "model"
         index_filename = "model.safetensors.index.json"
+    print(f"Naming scheme: {filename_prefix} (Index: {index_filename})")
+    # 4. Load LoRA
+    dtype = torch.bfloat16 if precision == "bf16" else torch.float16 if precision == "fp16" else torch.float32
+    try:
+        progress(0.15, desc="Downloading LoRA...")
+        lora_path = download_lora_smart(lora_input, hf_token)
+        lora_pairs = load_lora_to_memory(lora_path, precision_dtype=dtype)
+    except Exception as e: return f"Error loading LoRA: {e}"
+    # 5. Stream Process
+    buffer = ShardBuffer(shard_size, TempDir, output_repo, output_subfolder, hf_token, filename_prefix=filename_prefix)
     for i, shard_file in enumerate(input_shards):
+        progress(0.2 + (0.7 * i / len(input_shards)), desc=f"Processing {os.path.basename(shard_file)}")
+        with MemoryEfficientSafeOpen(shard_file) as f:
             keys = f.keys()
             for k in keys:
                 v = f.get_tensor(k)
                 lora_keys = set(lora_pairs.keys())
                 match = None
+                if base_stem in lora_keys: match = lora_pairs[base_stem]
+                # QKV Heuristics (Z-Image/Flux specific)
+                if not match:
+                     if "to_q" in base_stem:
                         qkv_stem = base_stem.replace("to_q", "qkv")
                         if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
+                     elif "to_k" in base_stem:
                         qkv_stem = base_stem.replace("to_k", "qkv")
                         if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
+                     elif "to_v" in base_stem:
                         qkv_stem = base_stem.replace("to_v", "qkv")
                         if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
                 if match and "down" in match and "up" in match:
                     down = match["down"]
                     up = match["up"]
+                    scaling = scale * (match["alpha"] / match["rank"])
                     if len(v.shape) == 4 and len(down.shape) == 2:
                         down = down.unsqueeze(-1).unsqueeze(-1)
                         up = up.unsqueeze(-1).unsqueeze(-1)
                     delta = delta * scaling
                     valid_delta = True
+                    if delta.shape == v.shape: pass
                     elif delta.shape[0] == v.shape[0] * 3:
                         chunk = v.shape[0]
                         if "to_q" in k: delta = delta[0:chunk, ...]
                         else: valid_delta = False
                     elif delta.numel() == v.numel():
                         delta = delta.reshape(v.shape)
+                    else: valid_delta = False
                     if valid_delta:
                         v = v.to(dtype)
                 buffer.add_tensor(k, v)
                 del v
+        os.remove(shard_file)
         gc.collect()
     buffer.flush()
+    # 6. Upload Index (Now using correct total_size)
+    print(f"Uploading Index: {index_filename} (Total Size: {buffer.total_size})")
+    index_data = {"metadata": {"total_size": buffer.total_size}, "weight_map": buffer.index_map}
     with open(TempDir / index_filename, "w") as f:
         json.dump(index_data, f, indent=4)
+    path_in_repo = f"{output_subfolder}/{index_filename}" if output_subfolder else index_filename
     api.upload_file(path_or_fileobj=TempDir / index_filename, path_in_repo=path_in_repo, repo_id=output_repo, token=hf_token)
     cleanup_temp()