| """Merge 7B LoRA adapter with FP16 base (NOT quantized) and push.""" |
| import os, torch, gc |
| from peft import PeftModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from huggingface_hub import login |
|
|
| HF_TOKEN = os.environ["HF_TOKEN"] |
| login(token=HF_TOKEN) |
|
|
| print("Loading Qwen 7B FP16 on CPU...") |
| base = AutoModelForCausalLM.from_pretrained( |
| "Qwen/Qwen2.5-7B-Instruct", |
| torch_dtype=torch.float16, |
| device_map="cpu", |
| trust_remote_code=True, |
| ) |
|
|
| print("Loading adapter...") |
| model = PeftModel.from_pretrained(base, "devsomosahub/agent-os-adapter-7b") |
|
|
| print("Merging...") |
| merged = model.merge_and_unload() |
|
|
| tok = AutoTokenizer.from_pretrained("devsomosahub/agent-os-adapter-7b", trust_remote_code=True) |
|
|
| print("Pushing merged 7B to Hub...") |
| merged.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN, max_shard_size="2GB") |
| tok.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN) |
| print("DONE! https://huggingface.co/devsomosahub/agent-os-7b-merged") |
|
|