Soon_Merger_Mirror

Sleeping

App Files Files Community

AlekseyCalvin commited on Dec 17, 2025

Commit

029e89b

verified ·

1 Parent(s): 584e440

Update app.py

Browse files

Files changed (1) hide show

app.py +289 -117

app.py CHANGED Viewed

@@ -229,63 +229,177 @@ class ShardBuffer:
         self.current_bytes = 0
         gc.collect()
 def download_lora_smart(input_str, token):
-    """
-    Handles Repo IDs (user/repo) and Direct URLs.
-    """
     local_path = TempDir / "adapter.safetensors"
-    # 1. Direct URL (Private/Public)
     if input_str.startswith("http"):
         print(f"Downloading LoRA from URL: {input_str}")
         headers = {"Authorization": f"Bearer {token}"} if token else {}
         try:
-            response = requests.get(input_str, stream=True, headers=headers, timeout=30)
             response.raise_for_status()
             with open(local_path, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
-            # Basic validation
-            with open(local_path, "rb") as f:
-                if len(f.read(8)) == 8: return local_path
         except Exception as e:
             print(f"URL download failed: {e}. Trying as Repo ID...")
-    # 2. Repo ID (Fallback or Primary)
-    # If the user entered a repo ID (e.g. "AlekseyCalvin/MyLora"), this catches it.
     print(f"Attempting download from Hub Repo: {input_str}")
     try:
-        # Try finding the specific file
         candidates = ["adapter_model.safetensors", "model.safetensors"]
-        target_file = None
-        try:
-            files = list_repo_files(repo_id=input_str, token=token)
-            safetensors = [f for f in files if f.endswith(".safetensors")]
-            for c in candidates:
-                if c in safetensors:
-                    target_file = c
-                    break
-            if not target_file and safetensors:
-                target_file = safetensors[0]
-        except:
-            # If listing fails, try default
-            target_file = "adapter_model.safetensors"
-        hf_hub_download(repo_id=input_str, filename=target_file, token=token, local_dir=TempDir, local_dir_use_symlinks=False)
-        # Rename to generic name
-        downloaded = TempDir / target_file
         if downloaded != local_path:
-            if local_path.exists(): os.remove(local_path)
             shutil.move(downloaded, local_path)
         return local_path
     except Exception as e:
-        raise ValueError(f"Failed to download LoRA from {input_str}. \nError: {e}")
 def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision, shard_size, output_repo, structure_repo, private, progress=gr.Progress()):
     cleanup_temp()
     login(hf_token)
     # 1. Output Setup
@@ -293,151 +407,209 @@ def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision
         api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
     except Exception as e: return f"Error creating repo: {e}"
-    # Define modes
-    output_subfolder = base_subfolder if base_subfolder else ""
-    # 2. Clone Structure
-    if structure_repo:
-        print(f"Cloning structure from {structure_repo}...")
-        # Ignore the folder we are overwriting (if any)
-        ignore = output_subfolder if output_subfolder else None
-        # Root merge mode (LLM) usually implies we skip weights in the root
-        is_root_merge = not bool(output_subfolder)
-        streaming_copy_structure(hf_token, structure_repo, output_repo, ignore_prefix=ignore, is_root_merge=is_root_merge)
-    # 3. Download Input Shards
-    progress(0.1, desc="Downloading Base Model...")
-    try:
-        files = list_repo_files(repo_id=base_repo, token=hf_token)
-    except Exception as e: return f"Error accessing base repo: {e}"
     input_shards = []
     for f in files:
         if f.endswith(".safetensors"):
-            # Filter by subfolder if specified
-            if output_subfolder and not f.startswith(output_subfolder): continue
-            local_path = TempDir / "input_shards" / os.path.basename(f)
-            os.makedirs(local_path.parent, exist_ok=True)
-            hf_hub_download(repo_id=base_repo, filename=f, token=hf_token, local_dir=local_path.parent, local_dir_use_symlinks=False)
-            # Locate file (handle nested download paths)
-            found = list(local_path.parent.rglob(os.path.basename(f)))
             if found: input_shards.append(found[0])
-    if not input_shards: return "No base safetensors found in specified location."
     input_shards.sort()
-    # --- NAMING CONVENTION LOGIC ---
-    # 1. Check for Diffusers specific subfolders -> force 'diffusion_pytorch_model'
-    if output_subfolder in ["transformer", "unet"]:
-        filename_prefix = "diffusion_pytorch_model"
-        index_filename = "diffusion_pytorch_model.safetensors.index.json"
-    # 2. Check input file naming -> adopt input convention
-    elif "diffusion_pytorch_model" in os.path.basename(input_shards[0]):
-        filename_prefix = "diffusion_pytorch_model"
-        index_filename = "diffusion_pytorch_model.safetensors.index.json"
-    # 3. Default to LLM style
     else:
-        filename_prefix = "model"
-        index_filename = "model.safetensors.index.json"
-    print(f"Naming scheme: {filename_prefix} (Index: {index_filename})")
-    # 4. Load LoRA
     dtype = torch.bfloat16 if precision == "bf16" else torch.float16 if precision == "fp16" else torch.float32
     try:
-        progress(0.15, desc="Downloading LoRA...")
         lora_path = download_lora_smart(lora_input, hf_token)
         lora_pairs = load_lora_to_memory(lora_path, precision_dtype=dtype)
-    except Exception as e: return f"Error loading LoRA: {e}"
-    # 5. Stream Process
-    buffer = ShardBuffer(shard_size, TempDir, output_repo, output_subfolder, hf_token, filename_prefix=filename_prefix)
-    for i, shard_file in enumerate(input_shards):
-        progress(0.2 + (0.7 * i / len(input_shards)), desc=f"Processing {os.path.basename(shard_file)}")
-        with MemoryEfficientSafeOpen(shard_file) as f:
-            keys = f.keys()
-            for k in keys:
-                v = f.get_tensor(k)
                 base_stem = get_key_stem(k)
-                lora_keys = set(lora_pairs.keys())
                 match = None
-                if base_stem in lora_keys: match = lora_pairs[base_stem]
-                # QKV Heuristics (Z-Image/Flux specific)
                 if not match:
                      if "to_q" in base_stem:
-                        qkv_stem = base_stem.replace("to_q", "qkv")
-                        if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
                      elif "to_k" in base_stem:
-                        qkv_stem = base_stem.replace("to_k", "qkv")
-                        if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
                      elif "to_v" in base_stem:
-                        qkv_stem = base_stem.replace("to_v", "qkv")
-                        if qkv_stem in lora_keys: match = lora_pairs[qkv_stem]
-                if match and "down" in match and "up" in match:
                     down = match["down"]
                     up = match["up"]
                     scaling = scale * (match["alpha"] / match["rank"])
                     if len(v.shape) == 4 and len(down.shape) == 2:
                         down = down.unsqueeze(-1).unsqueeze(-1)
                         up = up.unsqueeze(-1).unsqueeze(-1)
                     try:
                         if len(up.shape) == 4:
                             delta = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], 1, 1)
                         else:
                             delta = up @ down
-                    except:
-                        delta = up.T @ down
                     delta = delta * scaling
-                    valid_delta = True
                     if delta.shape == v.shape: pass
                     elif delta.shape[0] == v.shape[0] * 3:
                         chunk = v.shape[0]
                         if "to_q" in k: delta = delta[0:chunk, ...]
                         elif "to_k" in k: delta = delta[chunk:2*chunk, ...]
                         elif "to_v" in k: delta = delta[2*chunk:, ...]
-                        else: valid_delta = False
-                    elif delta.numel() == v.numel():
-                        delta = delta.reshape(v.shape)
-                    else: valid_delta = False
-                    if valid_delta:
                         v = v.to(dtype)
                         delta = delta.to(dtype)
                         v.add_(delta)
                         del delta
                 if v.dtype != dtype: v = v.to(dtype)
-                buffer.add_tensor(k, v)
                 del v
-        os.remove(shard_file)
         gc.collect()
-    buffer.flush()
-    # 6. Upload Index (Now using correct total_size)
-    print(f"Uploading Index: {index_filename} (Total Size: {buffer.total_size})")
-    index_data = {"metadata": {"total_size": buffer.total_size}, "weight_map": buffer.index_map}
-    with open(TempDir / index_filename, "w") as f:
         json.dump(index_data, f, indent=4)
-    path_in_repo = f"{output_subfolder}/{index_filename}" if output_subfolder else index_filename
-    api.upload_file(path_or_fileobj=TempDir / index_filename, path_in_repo=path_in_repo, repo_id=output_repo, token=hf_token)
     cleanup_temp()
-    return f"Done! Merged into {buffer.shard_count} shards at {output_repo}"
 # =================================================================================
 # TAB 2: EXTRACT LORA

         self.current_bytes = 0
         gc.collect()
+# =================================================================================
+# ROBUST RESHARDING LOGIC (Plan -> Execute)
+# =================================================================================
 def download_lora_smart(input_str, token):
+    """Robust LoRA downloader that handles Direct URLs and Repo IDs."""
     local_path = TempDir / "adapter.safetensors"
+    if local_path.exists(): os.remove(local_path)
+    # 1. Try as Direct URL
     if input_str.startswith("http"):
         print(f"Downloading LoRA from URL: {input_str}")
         headers = {"Authorization": f"Bearer {token}"} if token else {}
         try:
+            response = requests.get(input_str, stream=True, headers=headers, timeout=60)
             response.raise_for_status()
             with open(local_path, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
+            if verify_safetensors(local_path): return local_path
         except Exception as e:
             print(f"URL download failed: {e}. Trying as Repo ID...")
+    # 2. Try as Repo ID
     print(f"Attempting download from Hub Repo: {input_str}")
     try:
+        # Check if user provided a filename in the repo string (e.g. user/repo/file.safetensors)
+        if ".safetensors" in input_str and "/" in input_str:
+            # splitting repo_id and filename might be needed, but hf_hub_download expects valid repo_id
+            pass
+        # Try to find the adapter file automatically
+        files = list_repo_files(repo_id=input_str, token=token)
         candidates = ["adapter_model.safetensors", "model.safetensors"]
+        target = next((f for f in files if f in candidates), None)
+        # If no standard name, take the first safetensors found
+        if not target:
+            safes = [f for f in files if f.endswith(".safetensors")]
+            if safes: target = safes[0]
+        if not target: raise ValueError("No .safetensors found")
+        hf_hub_download(repo_id=input_str, filename=target, token=token, local_dir=TempDir)
+        # Move to standard location
+        downloaded = TempDir / target
         if downloaded != local_path:
             shutil.move(downloaded, local_path)
         return local_path
     except Exception as e:
+        raise ValueError(f"Could not download LoRA. Checked URL and Repo. Error: {e}")
+def get_tensor_byte_size(shape, dtype_str):
+    """Calculates byte size of a tensor based on shape and dtype."""
+    # F32=4, F16/BF16=2, I8=1, etc.
+    bytes_per = 4 if "F32" in dtype_str else 2 if "16" in dtype_str else 1
+    numel = 1
+    for d in shape: numel *= d
+    return numel * bytes_per
+def plan_resharding(input_shards, max_shard_size_gb, filename_prefix):
+    """
+    Pass 1: Reads headers ONLY. Groups tensors into virtual shards of max_shard_size_gb.
+    Returns a Plan (List of ShardDefinitions).
+    """
+    print(f"Planning resharding (Max {max_shard_size_gb} GB)...")
+    max_bytes = int(max_shard_size_gb * 1024**3)
+    all_tensors = []
+    # 1. Scan all inputs
+    for p in input_shards:
+        with MemoryEfficientSafeOpen(p) as f:
+            for k in f.keys():
+                shape = f.header[k]['shape']
+                dtype = f.header[k]['dtype']
+                size = get_tensor_byte_size(shape, dtype)
+                all_tensors.append({
+                    "key": k,
+                    "shape": shape,
+                    "dtype": dtype,
+                    "size": size,
+                    "source": p
+                })
+    # 2. Sort tensors (Crucial for deterministic output)
+    all_tensors.sort(key=lambda x: x["key"])
+    # 3. Bucket into Shards
+    plan = []
+    current_shard = []
+    current_size = 0
+    for t in all_tensors:
+        # If adding this tensor exceeds limit AND we have stuff in the bucket, close bucket
+        if current_size + t['size'] > max_bytes and current_shard:
+            plan.append(current_shard)
+            current_shard = []
+            current_size = 0
+        current_shard.append(t)
+        current_size += t['size']
+    if current_shard:
+        plan.append(current_shard)
+    total_shards = len(plan)
+    total_model_size = sum(t['size'] for shard in plan for t in shard)
+    print(f"Plan created: {total_shards} shards. Total size: {total_model_size / 1024**3:.2f} GB")
+    # 4. Format Plan
+    final_plan = []
+    for i, shard_tensors in enumerate(plan):
+        # Naming: prefix-00001-of-00005.safetensors
+        name = f"{filename_prefix}-{i+1:05d}-of-{total_shards:05d}.safetensors"
+        final_plan.append({
+            "filename": name,
+            "tensors": shard_tensors
+        })
+    return final_plan, total_model_size
+def copy_auxiliary_configs(hf_token, base_repo, base_subfolder, output_repo, output_subfolder):
+    """
+    Downloads NON-WEIGHT files (json, txt, model) from Base Repo and uploads to Output.
+    """
+    print(f"Copying config files from {base_repo}...")
+    try:
+        files = list_repo_files(repo_id=base_repo, token=hf_token)
+        # Extensions to KEEP (Configs, Tokenizers, etc.)
+        allowed_ext = ['.json', '.txt', '.model', '.py', '.yml', '.yaml']
+        # Extensions to SKIP (Weights, we are generating these)
+        blocked_ext = ['.safetensors', '.bin', '.pt', '.pth', '.msgpack', '.h5']
+        for f in files:
+            # Filter by subfolder if needed
+            if base_subfolder and not f.startswith(base_subfolder):
+                continue
+            ext = os.path.splitext(f)[1]
+            if ext in blocked_ext: continue
+            if ext not in allowed_ext: continue # Skip unknown types to be safe? Or allow?
+            # Download
+            print(f"Transferring {f}...")
+            local = hf_hub_download(repo_id=base_repo, filename=f, token=hf_token, local_dir=TempDir)
+            # Determine path in new repo
+            if base_subfolder:
+                # Remove base_subfolder prefix for the rel path
+                rel_name = f[len(base_subfolder):].lstrip('/')
+            else:
+                rel_name = f
+            # Add output_subfolder prefix
+            target_path = f"{output_subfolder}/{rel_name}" if output_subfolder else rel_name
+            api.upload_file(path_or_fileobj=local, path_in_repo=target_path, repo_id=output_repo, token=hf_token)
+            os.remove(local)
+    except Exception as e:
+        print(f"Config copy warning: {e}")
 def task_merge(hf_token, base_repo, base_subfolder, lora_input, scale, precision, shard_size, output_repo, structure_repo, private, progress=gr.Progress()):
     cleanup_temp()
+    if not hf_token: return "Error: Token missing."
     login(hf_token)
     # 1. Output Setup
         api.create_repo(repo_id=output_repo, private=private, exist_ok=True, token=hf_token)
     except Exception as e: return f"Error creating repo: {e}"
+    # Determine Folder Logic
+    # If base_subfolder is "qint4", and we want output to be "transformer", user needs to specify that.
+    # But usually, if base has a subfolder, we maintain a subfolder structure.
+    # ADAPTIVE: If base_subfolder is "qint4", we treat it as the source of weights.
+    # Since you merged into "transformer", I assume you want the output in "transformer".
+    # For general LLMs (root), both are empty.
+    # Heuristic: If base has subfolder, use "transformer" as target if it looks like a DiT, else keep original name.
+    if base_subfolder:
+        output_subfolder = "transformer" if "qint" in base_subfolder or "transformer" in base_subfolder else base_subfolder
+    else:
+        output_subfolder = ""
+    # 2. Copy Configs (The missing step from previous run)
+    copy_auxiliary_configs(hf_token, base_repo, base_subfolder, output_repo, output_subfolder)
+    # 3. Structure Repo (Only needed if Base doesn't have everything, e.g. VAE)
+    if structure_repo:
+        print(f"Copying extras from {structure_repo}...")
+        # We assume structure repo is a standard diffusers repo
+        # We copy text_encoder, vae, scheduler, tokenizer, etc.
+        # We SKIP 'transformer' or 'unet' because we are building that.
+        streaming_copy_structure(hf_token, structure_repo, output_repo, ignore_prefix="transformer")
+    # 4. Download ALL Input Shards (Needed for Planning)
+    progress(0.1, desc="Downloading Input Model...")
+    files = list_repo_files(repo_id=base_repo, token=hf_token)
     input_shards = []
     for f in files:
         if f.endswith(".safetensors"):
+            if base_subfolder and not f.startswith(base_subfolder): continue
+            local = TempDir / "inputs" / os.path.basename(f)
+            os.makedirs(local.parent, exist_ok=True)
+            hf_hub_download(repo_id=base_repo, filename=f, token=hf_token, local_dir=local.parent, local_dir_use_symlinks=False)
+            # Handle nesting
+            found = list(local.parent.rglob(os.path.basename(f)))
             if found: input_shards.append(found[0])
+    if not input_shards: return "No safetensors found."
     input_shards.sort()
+    # 5. Detect Naming Convention (Adaptive)
+    sample_name = os.path.basename(input_shards[0])
+    if "diffusion_pytorch_model" in sample_name or output_subfolder == "transformer":
+        prefix = "diffusion_pytorch_model"
+        index_file = "diffusion_pytorch_model.safetensors.index.json"
     else:
+        prefix = "model"
+        index_file = "model.safetensors.index.json"
+    # 6. Create Plan (Pass 1)
+    # This calculates total shards and size BEFORE processing
+    progress(0.2, desc="Planning Shards...")
+    plan, total_model_size = plan_resharding(input_shards, shard_size, prefix)
+    # 7. Load LoRA
     dtype = torch.bfloat16 if precision == "bf16" else torch.float16 if precision == "fp16" else torch.float32
     try:
+        progress(0.25, desc="Loading LoRA...")
         lora_path = download_lora_smart(lora_input, hf_token)
         lora_pairs = load_lora_to_memory(lora_path, precision_dtype=dtype)
+    except Exception as e: return f"LoRA Error: {e}"
+    # 8. Execute Plan (Pass 2)
+    index_map = {}
+    for i, shard_plan in enumerate(plan):
+        filename = shard_plan['filename']
+        tensors_to_write = shard_plan['tensors']
+        progress(0.3 + (0.7 * i / len(plan)), desc=f"Merging {filename}")
+        print(f"Generating {filename} ({len(tensors_to_write)} tensors)...")
+        # Prepare Header
+        header = {"__metadata__": {"format": "pt"}}
+        current_offset = 0
+        for t in tensors_to_write:
+            # Recalculate dtype string for header based on TARGET dtype
+            tgt_dtype_str = "BF16" if dtype == torch.bfloat16 else "F16" if dtype == torch.float16 else "F32"
+            # Calculate output size (might differ from input size if we change precision)
+            # Input size in plan was source size. We need target size.
+            out_size = get_tensor_byte_size(t['shape'], tgt_dtype_str)
+            header[t['key']] = {
+                "dtype": tgt_dtype_str,
+                "shape": t['shape'],
+                "data_offsets": [current_offset, current_offset + out_size]
+            }
+            current_offset += out_size
+            index_map[t['key']] = filename
+        header_json = json.dumps(header).encode('utf-8')
+        out_path = TempDir / filename
+        with open(out_path, 'wb') as f_out:
+            f_out.write(struct.pack('<Q', len(header_json)))
+            f_out.write(header_json)
+            # Open source files as needed
+            open_files = {}
+            for t_plan in tqdm(tensors_to_write, leave=False):
+                src = t_plan['source']
+                if src not in open_files: open_files[src] = MemoryEfficientSafeOpen(src)
+                # Load Tensor
+                v = open_files[src].get_tensor(t_plan['key'])
+                k = t_plan['key']
+                # --- MERGE LOGIC ---
                 base_stem = get_key_stem(k)
                 match = None
+                # Check match (Same logic as before)
+                if base_stem in lora_pairs: match = lora_pairs[base_stem]
+                # ... [QKV Logic omitted for brevity, same as previous] ...
                 if not match:
                      if "to_q" in base_stem:
+                        qkv = base_stem.replace("to_q", "qkv")
+                        if qkv in lora_pairs: match = lora_pairs[qkv]
                      elif "to_k" in base_stem:
+                        qkv = base_stem.replace("to_k", "qkv")
+                        if qkv in lora_pairs: match = lora_pairs[qkv]
                      elif "to_v" in base_stem:
+                        qkv = base_stem.replace("to_v", "qkv")
+                        if qkv in lora_pairs: match = lora_pairs[qkv]
+                if match:
                     down = match["down"]
                     up = match["up"]
+                    # ... [Matmul Logic, same as previous] ...
                     scaling = scale * (match["alpha"] / match["rank"])
                     if len(v.shape) == 4 and len(down.shape) == 2:
                         down = down.unsqueeze(-1).unsqueeze(-1)
                         up = up.unsqueeze(-1).unsqueeze(-1)
                     try:
                         if len(up.shape) == 4:
                             delta = (up.squeeze() @ down.squeeze()).reshape(up.shape[0], down.shape[1], 1, 1)
                         else:
                             delta = up @ down
+                    except: delta = up.T @ down
                     delta = delta * scaling
+                    # Slicing
+                    valid = True
                     if delta.shape == v.shape: pass
                     elif delta.shape[0] == v.shape[0] * 3:
                         chunk = v.shape[0]
                         if "to_q" in k: delta = delta[0:chunk, ...]
                         elif "to_k" in k: delta = delta[chunk:2*chunk, ...]
                         elif "to_v" in k: delta = delta[2*chunk:, ...]
+                        else: valid = False
+                    elif delta.numel() == v.numel(): delta = delta.reshape(v.shape)
+                    else: valid = False
+                    if valid:
                         v = v.to(dtype)
                         delta = delta.to(dtype)
                         v.add_(delta)
                         del delta
+                # --- END MERGE ---
+                # Write
                 if v.dtype != dtype: v = v.to(dtype)
+                if dtype == torch.bfloat16:
+                    raw = v.view(torch.int16).numpy().tobytes()
+                else:
+                    raw = v.numpy().tobytes()
+                f_out.write(raw)
                 del v
+            # Close handles
+            for fh in open_files.values(): fh.file.close()
+        # Upload Shard
+        path_in_repo = f"{output_subfolder}/{filename}" if output_subfolder else filename
+        api.upload_file(path_or_fileobj=out_path, path_in_repo=path_in_repo, repo_id=output_repo, token=hf_token)
+        os.remove(out_path)
         gc.collect()
+    # 9. Upload Index
+    # Update total size to reflect the TARGET dtype size, not source
+    # We recalculate total_size based on what we actually wrote
+    final_total_size = 0
+    for t_list in plan:
+        for t in t_list['tensors']:
+             tgt_dtype_str = "BF16" if dtype == torch.bfloat16 else "F16" if dtype == torch.float16 else "F32"
+             final_total_size += get_tensor_byte_size(t['shape'], tgt_dtype_str)
+    index_data = {"metadata": {"total_size": final_total_size}, "weight_map": index_map}
+    with open(TempDir / index_file, "w") as f:
         json.dump(index_data, f, indent=4)
+    path_in_repo = f"{output_subfolder}/{index_file}" if output_subfolder else index_file
+    api.upload_file(path_or_fileobj=TempDir / index_file, path_in_repo=path_in_repo, repo_id=output_repo, token=hf_token)
     cleanup_temp()
+    return f"Success! {len(plan)} shards created at {output_repo}"
 # =================================================================================
 # TAB 2: EXTRACT LORA