Spaces:
Sleeping
Sleeping
File size: 5,617 Bytes
9d4cc4b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | import os
import yaml
import gc
import torch
import shutil
import subprocess
import sys
from pathlib import Path
# --- CRITICAL PATCH: MUST RUN BEFORE MERGEKIT IMPORTS ---
import pydantic
from pydantic import ConfigDict, BaseModel
BaseModel.model_config = ConfigDict(arbitrary_types_allowed=True)
try:
from mergekit.config import MergeConfiguration
from mergekit.merge import run_merge
except ImportError:
print("MergeKit not found. Please install 'mergekit' in requirements.txt")
def execute_mergekit_config(config_dict, out_path, shard_gb, device="cpu"):
"""
Executes a MergeKit run. intelligently branches between:
1. Standard Graph Merges (TIES, SLERP, etc.) -> uses internal run_merge
2. MoE Construction -> uses mergekit-moe CLI
"""
# Convert dict to YAML string
config_yaml = yaml.dump(config_dict, sort_keys=False)
print("--- Generated MergeKit Config ---")
print(config_yaml)
print("---------------------------------")
# --- BRANCH 1: MIXTURE OF EXPERTS (MoE) ---
if "experts" in config_dict:
print("🚀 Detected MoE Configuration. Switching to 'mergekit-moe' pipeline...")
# 1. Write Config to Temp File (CLI requires a file)
config_path = Path(out_path).parent / "moe_config.yaml"
if not config_path.parent.exists():
os.makedirs(config_path.parent, exist_ok=True)
with open(config_path, "w") as f:
f.write(config_yaml)
# 2. Build CLI Command
# We use sys.executable to ensure we use the current environment's mergekit
cmd = [
"mergekit-moe",
str(config_path),
str(out_path),
"--shard-size", f"{int(shard_gb * 1024**3)}", # Bytes
"--copy-tokenizer",
"--trust-remote-code"
]
# 3. Execute
try:
subprocess.run(cmd, check=True)
print("✅ MoE Construction Complete.")
except subprocess.CalledProcessError as e:
raise RuntimeError(f"MoE Build Failed with exit code {e.returncode}. Check logs.")
finally:
if config_path.exists():
os.remove(config_path)
# --- BRANCH 2: STANDARD MERGE (TIES, SLERP, ETC.) ---
else:
print("⚡ Detected Standard Merge Configuration. Using internal engine...")
# Validate against the Strict Schema (MergeConfiguration)
# This will fail if 'merge_method' is missing, which is correct for standard merges
try:
conf = MergeConfiguration.model_validate(yaml.safe_load(config_yaml))
except pydantic.ValidationError as e:
raise ValueError(f"Invalid Merge Configuration: {e}\n(Did you forget 'merge_method'?)")
run_merge(
conf,
out_path=out_path,
device=device,
low_cpu_mem=True,
copy_tokenizer=True,
lazy_unpickle=True,
max_shard_size=int(shard_gb * 1024**3)
)
# Force cleanup
gc.collect()
def build_full_merge_config(
method, models, base_model, weights, density,
dtype, tokenizer_source, layer_ranges
):
"""
Constructs the YAML dictionary for general merging (Linear, SLERP, TIES, etc.)
"""
# Basic Config
config = {
"merge_method": method.lower(),
"base_model": base_model if base_model else models[0],
"dtype": dtype,
"tokenizer_source": tokenizer_source,
"models": []
}
# Helper to parse weights safely
w_list = []
if weights:
try:
w_list = [float(x.strip()) for x in weights.split(',')]
except:
print("Warning: Could not parse weights, defaulting to 1.0")
# Model Construction
for i, m in enumerate(models):
entry = {"model": m, "parameters": {}}
# Method Specific Param Injection
if method.lower() in ["ties", "dare_ties", "dare_linear"]:
entry["parameters"]["weight"] = w_list[i] if i < len(w_list) else 1.0
entry["parameters"]["density"] = density
elif method.lower() == "slerp":
entry["parameters"]["weight"] = w_list[i] if i < len(w_list) else 1.0
elif method.lower() == "linear":
entry["parameters"]["weight"] = w_list[i] if i < len(w_list) else 1.0
config["models"].append(entry)
# Inject Slices/Layer Ranges if provided (Raw JSON override)
if layer_ranges and layer_ranges.strip():
try:
extra_params = yaml.safe_load(layer_ranges)
if isinstance(extra_params, dict):
config.update(extra_params)
except Exception as e:
print(f"Error parsing layer ranges JSON: {e}")
return config
def build_moe_config(
base_model, experts, gate_mode, dtype,
tokenizer_source, positive_prompts=None
):
"""
Constructs the YAML dictionary for Mixture of Experts (MoE).
Note: We do NOT add 'merge_method' here because MoE configs
do not use that field in the standard MergeKit schema.
"""
config = {
"base_model": base_model,
"gate_mode": gate_mode,
"dtype": dtype,
"tokenizer_source": tokenizer_source,
"experts": []
}
# Parse experts
for i, exp in enumerate(experts):
expert_entry = {
"source_model": exp,
"positive_prompts": [f"expert_{i}"] # Placeholder if not provided
}
config["experts"].append(expert_entry)
return config |