File size: 994 Bytes
04a4a4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""Merge 7B LoRA adapter with FP16 base (NOT quantized) and push."""
import os, torch, gc
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login

HF_TOKEN = os.environ["HF_TOKEN"]
login(token=HF_TOKEN)

print("Loading Qwen 7B FP16 on CPU...")
base = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-7B-Instruct",
    torch_dtype=torch.float16,
    device_map="cpu",
    trust_remote_code=True,
)

print("Loading adapter...")
model = PeftModel.from_pretrained(base, "devsomosahub/agent-os-adapter-7b")

print("Merging...")
merged = model.merge_and_unload()

tok = AutoTokenizer.from_pretrained("devsomosahub/agent-os-adapter-7b", trust_remote_code=True)

print("Pushing merged 7B to Hub...")
merged.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN, max_shard_size="2GB")
tok.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN)
print("DONE! https://huggingface.co/devsomosahub/agent-os-7b-merged")