Upload folder using huggingface_hub
Browse files- adapter_config.json +31 -0
- adapter_model.safetensors +3 -0
- merge.py +133 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "openai/gpt-oss-120b",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": false,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 32,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"r": 32,
|
| 24 |
+
"rank_pattern": {},
|
| 25 |
+
"revision": null,
|
| 26 |
+
"target_modules": "all-linear",
|
| 27 |
+
"task_type": "CAUSAL_LM",
|
| 28 |
+
"trainable_token_indices": null,
|
| 29 |
+
"use_dora": false,
|
| 30 |
+
"use_rslora": false
|
| 31 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1eadb86093bba917a6b603e1786cd7b03c0175f1ee965955cc290983f346449
|
| 3 |
+
size 5257620504
|
merge.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Merge LoRA adapter into base model weights.
|
| 2 |
+
|
| 3 |
+
Usage:
|
| 4 |
+
pip install torch transformers safetensors tqdm huggingface-hub
|
| 5 |
+
python merge.py --output ./merged_model
|
| 6 |
+
|
| 7 |
+
Loads the MXFP4 base model, dequantizes to bf16, applies LoRA deltas, saves merged model.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
import json
|
| 12 |
+
import shutil
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
from huggingface_hub import snapshot_download
|
| 17 |
+
from safetensors.torch import load_file, save_file
|
| 18 |
+
from tqdm import tqdm
|
| 19 |
+
from transformers import AutoModelForCausalLM
|
| 20 |
+
|
| 21 |
+
BASE_MODEL = "openai/gpt-oss-120b"
|
| 22 |
+
ADAPTER_REPO = "LightningRodLabs/Golf-Forecaster"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def merge(output_dir: str):
|
| 26 |
+
output_dir = Path(output_dir)
|
| 27 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
# Download adapter
|
| 30 |
+
print("Downloading adapter...")
|
| 31 |
+
adapter_dir = Path(snapshot_download(ADAPTER_REPO))
|
| 32 |
+
adapter_config = json.loads((adapter_dir / "adapter_config.json").read_text())
|
| 33 |
+
scaling = adapter_config["lora_alpha"] / adapter_config["r"]
|
| 34 |
+
adapter_weights = load_file(str(adapter_dir / "adapter_model.safetensors"))
|
| 35 |
+
print(f"Adapter: {len(adapter_weights)} keys, scaling={scaling}")
|
| 36 |
+
|
| 37 |
+
# Load base model (dequantizes MXFP4 to bf16)
|
| 38 |
+
print("Loading base model...")
|
| 39 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 40 |
+
BASE_MODEL, torch_dtype=torch.bfloat16, device_map="cpu", trust_remote_code=True,
|
| 41 |
+
)
|
| 42 |
+
state_dict = base_model.state_dict()
|
| 43 |
+
del base_model
|
| 44 |
+
|
| 45 |
+
# Group LoRA A/B pairs
|
| 46 |
+
lora_pairs = {}
|
| 47 |
+
for key, tensor in adapter_weights.items():
|
| 48 |
+
clean = key.replace("base_model.model.", "", 1)
|
| 49 |
+
if ".lora_A.weight" in clean:
|
| 50 |
+
lora_pairs.setdefault(clean.replace(".lora_A.weight", ""), {})["A"] = tensor
|
| 51 |
+
elif ".lora_B.weight" in clean:
|
| 52 |
+
lora_pairs.setdefault(clean.replace(".lora_B.weight", ""), {})["B"] = tensor
|
| 53 |
+
|
| 54 |
+
# Map adapter keys to base model keys + merge operation
|
| 55 |
+
# Adapter uses Tinker naming; HF transformers uses different names:
|
| 56 |
+
# attn -> self_attn, w1(gate)/w3(up) -> gate_up_proj (interleaved), w2 -> down_proj
|
| 57 |
+
base_key_ops = {}
|
| 58 |
+
for adapter_path in lora_pairs:
|
| 59 |
+
if "unembed_tokens" in adapter_path:
|
| 60 |
+
base_key_ops.setdefault("lm_head.weight", []).append(("add", adapter_path))
|
| 61 |
+
elif ".attn." in adapter_path:
|
| 62 |
+
base_key = adapter_path.replace(".attn.", ".self_attn.") + ".weight"
|
| 63 |
+
base_key_ops.setdefault(base_key, []).append(("add", adapter_path))
|
| 64 |
+
elif ".mlp.experts.w1" in adapter_path:
|
| 65 |
+
prefix = adapter_path.split(".mlp.experts.w1")[0]
|
| 66 |
+
base_key_ops.setdefault(prefix + ".mlp.experts.gate_up_proj", []).append(("even_t", adapter_path))
|
| 67 |
+
elif ".mlp.experts.w3" in adapter_path:
|
| 68 |
+
prefix = adapter_path.split(".mlp.experts.w3")[0]
|
| 69 |
+
base_key_ops.setdefault(prefix + ".mlp.experts.gate_up_proj", []).append(("odd_t", adapter_path))
|
| 70 |
+
elif ".mlp.experts.w2" in adapter_path:
|
| 71 |
+
prefix = adapter_path.split(".mlp.experts.w2")[0]
|
| 72 |
+
base_key_ops.setdefault(prefix + ".mlp.experts.down_proj", []).append(("add_t", adapter_path))
|
| 73 |
+
|
| 74 |
+
# Apply LoRA deltas
|
| 75 |
+
for base_key, ops in tqdm(sorted(base_key_ops.items()), desc="Merging LoRA"):
|
| 76 |
+
w = state_dict[base_key].float()
|
| 77 |
+
for op_type, adapter_path in ops:
|
| 78 |
+
A = lora_pairs[adapter_path]["A"].float()
|
| 79 |
+
B = lora_pairs[adapter_path]["B"].float()
|
| 80 |
+
delta = torch.matmul(B, A) * scaling
|
| 81 |
+
if op_type == "add":
|
| 82 |
+
w += delta
|
| 83 |
+
elif op_type == "even_t":
|
| 84 |
+
w[:, :, ::2] += delta.transpose(1, 2)
|
| 85 |
+
elif op_type == "odd_t":
|
| 86 |
+
w[:, :, 1::2] += delta.transpose(1, 2)
|
| 87 |
+
elif op_type == "add_t":
|
| 88 |
+
w += delta.transpose(1, 2)
|
| 89 |
+
state_dict[base_key] = w.to(torch.bfloat16)
|
| 90 |
+
|
| 91 |
+
# Save sharded safetensors
|
| 92 |
+
print(f"Saving to {output_dir}...")
|
| 93 |
+
max_shard = 5 * 1024**3
|
| 94 |
+
shards, current, size = [], {}, 0
|
| 95 |
+
for k, v in state_dict.items():
|
| 96 |
+
nbytes = v.numel() * v.element_size()
|
| 97 |
+
if size + nbytes > max_shard and current:
|
| 98 |
+
shards.append(current)
|
| 99 |
+
current, size = {}, 0
|
| 100 |
+
current[k] = v
|
| 101 |
+
size += nbytes
|
| 102 |
+
if current:
|
| 103 |
+
shards.append(current)
|
| 104 |
+
|
| 105 |
+
weight_map, total = {}, 0
|
| 106 |
+
for i, shard in enumerate(shards):
|
| 107 |
+
fname = f"model-{i+1:05d}-of-{len(shards):05d}.safetensors"
|
| 108 |
+
save_file(shard, str(output_dir / fname))
|
| 109 |
+
for k, v in shard.items():
|
| 110 |
+
weight_map[k] = fname
|
| 111 |
+
total += v.numel() * v.element_size()
|
| 112 |
+
|
| 113 |
+
(output_dir / "model.safetensors.index.json").write_text(
|
| 114 |
+
json.dumps({"metadata": {"total_size": total}, "weight_map": weight_map}, indent=2)
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Copy config + tokenizer from base model (remove quantization_config)
|
| 118 |
+
base_cache = Path(snapshot_download(BASE_MODEL, allow_patterns=["*.py", "*.json", "tokenizer*", "*.model"]))
|
| 119 |
+
for f in base_cache.iterdir():
|
| 120 |
+
if f.is_file() and f.name != "model.safetensors.index.json":
|
| 121 |
+
shutil.copy2(f, output_dir / f.name)
|
| 122 |
+
cfg = json.loads((output_dir / "config.json").read_text())
|
| 123 |
+
cfg.pop("quantization_config", None)
|
| 124 |
+
cfg["torch_dtype"] = "bfloat16"
|
| 125 |
+
(output_dir / "config.json").write_text(json.dumps(cfg, indent=2))
|
| 126 |
+
|
| 127 |
+
print(f"Done! Merged model saved to {output_dir} ({len(shards)} shards)")
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
parser = argparse.ArgumentParser()
|
| 132 |
+
parser.add_argument("--output", required=True, help="Output directory for merged model")
|
| 133 |
+
merge(parser.parse_args().output)
|