Soon_Merger_Toolkit

Sleeping

App Files Files Community

AlekseyCalvin commited on Jan 3

Commit

e859d40

verified ·

1 Parent(s): f4dc6b6

Create merge_utils.py

Browse files

Files changed (1) hide show

merge_utils.py +237 -0

merge_utils.py ADDED Viewed

	@@ -0,0 +1,237 @@

+import os
+import yaml
+import gc
+import torch
+import shutil
+import sys
+from pathlib import Path
+# --- CRITICAL PATCH: MUST RUN BEFORE MERGEKIT IMPORTS ---
+import pydantic
+from pydantic import ConfigDict, BaseModel
+# This forces Pydantic v2 to accept torch.Tensor as a valid type globally
+BaseModel.model_config = ConfigDict(arbitrary_types_allowed=True)
+try:
+    # Standard Merging
+    from mergekit.config import MergeConfiguration
+    from mergekit.merge import run_merge, MergeOptions
+    # MoE Merging
+    from mergekit.moe.config import MoEMergeConfig
+    from mergekit.scripts.moe import build as build_moe
+    # Raw PyTorch Merging
+    from mergekit.scripts.merge_raw_pytorch import RawPyTorchMergeConfig, plan_flat_merge
+    from mergekit.graph import Executor
+except ImportError:
+    print("Warning: mergekit not installed. Please install it via requirements.txt")
+def execute_mergekit_config(config_dict, out_path, shard_gb, device="cpu"):
+    """
+    Executes a MergeKit run by intelligently detecting the config type.
+    """
+    # Force garbage collection before start
+    gc.collect()
+    torch.cuda.empty_cache() if torch.cuda.is_available() else None
+    # Shared Options
+    merge_opts = MergeOptions(
+        device=device,
+        copy_tokenizer=True,
+        lazy_unpickle=True,
+        low_cpu_memory=True,
+        max_shard_size=int(shard_gb * 1024**3),
+        allow_crimes=True # Allow loose constraints
+    )
+    # --- BRANCH 1: MIXTURE OF EXPERTS (MoE) ---
+    if "experts" in config_dict:
+        print("🚀 Detected MoE Configuration.")
+        try:
+            # Validate using the specific MoE Schema
+            conf = MoEMergeConfig.model_validate(config_dict)
+            # Execute using the build function from mergekit.scripts.moe
+            build_moe(
+                config=conf,
+                out_path=out_path,
+                merge_options=merge_opts,
+                load_in_4bit=False,
+                load_in_8bit=False,
+                device=device,
+                verbose=True
+            )
+            print("✅ MoE Construction Complete.")
+        except Exception as e:
+            raise RuntimeError(f"MoE Build Failed: {e}")
+    # --- BRANCH 2: STANDARD MERGE (TIES, SLERP, ETC.) ---
+    else:
+        print("⚡ Detected Standard Merge Configuration.")
+        try:
+            # Validate using the Standard Schema
+            conf = MergeConfiguration.model_validate(config_dict)
+            # Execute using the standard runner
+            run_merge(
+                conf,
+                out_path=out_path,
+                device=device,
+                low_cpu_mem=True,
+                copy_tokenizer=True,
+                lazy_unpickle=True,
+                max_shard_size=int(shard_gb * 1024**3)
+            )
+            print("✅ Standard Merge Complete.")
+        except pydantic.ValidationError as e:
+            raise ValueError(f"Invalid Merge Configuration: {e}")
+        except Exception as e:
+            raise RuntimeError(f"Merge Failed: {e}")
+    gc.collect()
+def execute_raw_pytorch(config_dict, out_path, shard_gb, device="cpu"):
+    """
+    Executes a Raw PyTorch merge for non-transformer models.
+    """
+    print("🧠 Executing Raw PyTorch Merge...")
+    try:
+        # Validate using Raw Schema
+        conf = RawPyTorchMergeConfig.model_validate(config_dict)
+        merge_opts = MergeOptions(
+            device=device,
+            low_cpu_memory=True,
+            out_shard_size=int(shard_gb * 1024**3),
+            lazy_unpickle=True,
+            safe_serialization=True
+        )
+        # Plan the merge tasks
+        tasks = plan_flat_merge(
+            conf,
+            out_path,
+            tensor_union=False,
+            tensor_intersection=False,
+            options=merge_opts
+        )
+        # Execute the graph
+        executor = Executor(
+            tasks,
+            math_device=device,
+            storage_device="cpu"  # Force storage to CPU for low-resource safety
+        )
+        executor.execute()
+        print("✅ Raw PyTorch Merge Complete.")
+    except Exception as e:
+        raise RuntimeError(f"Raw Merge Failed: {e}")
+    finally:
+        gc.collect()
+def build_full_merge_config(
+    method, models, base_model, weights, density,
+    dtype, tokenizer_source, layer_ranges
+):
+    """
+    Constructs the YAML dictionary for general merging (Linear, SLERP, TIES, etc.)
+    """
+    config = {
+        "merge_method": method.lower(),
+        "base_model": base_model if base_model else models[0],
+        "dtype": dtype,
+        "tokenizer_source": tokenizer_source,
+        "models": []
+    }
+    w_list = []
+    if weights:
+        try:
+            w_list = [float(x.strip()) for x in weights.split(',')]
+        except:
+            pass
+    for i, m in enumerate(models):
+        entry = {"model": m, "parameters": {}}
+        # Method Specific Param Injection
+        if method.lower() in ["ties", "dare_ties", "dare_linear"]:
+            entry["parameters"]["weight"] = w_list[i] if i < len(w_list) else 1.0
+            entry["parameters"]["density"] = density
+        elif method.lower() in ["slerp", "linear"]:
+             entry["parameters"]["weight"] = w_list[i] if i < len(w_list) else 1.0
+        config["models"].append(entry)
+    # Inject Slices/Layer Ranges if provided
+    if layer_ranges and layer_ranges.strip():
+        try:
+            extra_params = yaml.safe_load(layer_ranges)
+            if isinstance(extra_params, dict):
+                config.update(extra_params)
+        except Exception as e:
+            print(f"Error parsing layer ranges JSON: {e}")
+    return config
+def build_moe_config(
+    base_model, experts, prompts, gate_mode, dtype,
+    tokenizer_source
+):
+    """
+    Constructs the YAML dictionary for MoE.
+    Maps prompts to experts if provided.
+    """
+    config = {
+        "base_model": base_model,
+        "gate_mode": gate_mode,
+        "dtype": dtype,
+        "tokenizer_source": tokenizer_source,
+        "experts": []
+    }
+    for i, exp in enumerate(experts):
+        expert_entry = {"source_model": exp}
+        # Map prompt if available
+        # "positive_prompts" is required for "hidden" gate mode
+        if i < len(prompts) and prompts[i].strip():
+            expert_entry["positive_prompts"] = [prompts[i].strip()]
+        # If hidden mode is forced but no prompt, we might fail validation
+        # But we leave it to the validator to complain if strictly required
+        config["experts"].append(expert_entry)
+    return config
+def build_raw_config(method, models, base_model, dtype, weights):
+    """
+    Constructs the YAML for Raw PyTorch merging.
+    """
+    config = {
+        "merge_method": method.lower(),
+        "dtype": dtype,
+        "models": []
+    }
+    if base_model:
+        config["base_model"] = base_model
+    w_list = []
+    if weights:
+        try:
+            w_list = [float(x.strip()) for x in weights.split(',')]
+        except: pass
+    for i, m in enumerate(models):
+        entry = {"model": m, "parameters": {}}
+        # Most raw methods just use weight
+        entry["parameters"]["weight"] = w_list[i] if i < len(w_list) else 1.0
+        config["models"].append(entry)
+    return config