Upload scripts/merge_7b_cloud.py with huggingface_hub

04a4a4c verified 13 days ago

994 Bytes

	"""Merge 7B LoRA adapter with FP16 base (NOT quantized) and push."""
	import os, torch, gc
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import login

	HF_TOKEN = os.environ["HF_TOKEN"]
	login(token=HF_TOKEN)

	print("Loading Qwen 7B FP16 on CPU...")
	base = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen2.5-7B-Instruct",
	torch_dtype=torch.float16,
	device_map="cpu",
	trust_remote_code=True,
	)

	print("Loading adapter...")
	model = PeftModel.from_pretrained(base, "devsomosahub/agent-os-adapter-7b")

	print("Merging...")
	merged = model.merge_and_unload()

	tok = AutoTokenizer.from_pretrained("devsomosahub/agent-os-adapter-7b", trust_remote_code=True)

	print("Pushing merged 7B to Hub...")
	merged.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN, max_shard_size="2GB")
	tok.push_to_hub("devsomosahub/agent-os-7b-merged", token=HF_TOKEN)
	print("DONE! https://huggingface.co/devsomosahub/agent-os-7b-merged")