sunkencity commited on
Commit
5d36f92
·
verified ·
1 Parent(s): 5199dbe

Upload convert_survival_32b_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_survival_32b_gguf.py +83 -0
convert_survival_32b_gguf.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # /// script
3
+ # dependencies = ["peft", "transformers", "torch", "huggingface_hub", "sentencepiece", "cmake"]
4
+ # ///
5
+
6
+ import os
7
+ import subprocess
8
+ from peft import PeftModel
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
10
+ from huggingface_hub import HfApi, create_repo
11
+ import torch
12
+
13
+ # Configuration
14
+ BASE_MODEL_ID = "Qwen/Qwen2.5-32B-Instruct"
15
+ ADAPTER_ID = "sunkencity/survival-expert-qwen-32b"
16
+ OUTPUT_REPO = "sunkencity/survival-expert-qwen-32b-gguf"
17
+ MERGED_DIR = "merged_model"
18
+ GGUF_FILE = "survival-expert-qwen-32b.Q4_K_M.gguf"
19
+
20
+ print(f"Loading base model: {BASE_MODEL_ID}")
21
+ # Load in bfloat16 to save memory and match training
22
+ base_model = AutoModelForCausalLM.from_pretrained(
23
+ BASE_MODEL_ID,
24
+ device_map="auto",
25
+ torch_dtype=torch.bfloat16,
26
+ trust_remote_code=True
27
+ )
28
+
29
+ print(f"Loading adapter: {ADAPTER_ID}")
30
+ model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
31
+
32
+ print("Merging model...")
33
+ model = model.merge_and_unload()
34
+
35
+ print(f"Saving merged model to {MERGED_DIR}...")
36
+ model.save_pretrained(MERGED_DIR)
37
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
38
+ tokenizer.save_pretrained(MERGED_DIR)
39
+
40
+ print("Cloning llama.cpp...")
41
+ if os.path.exists("llama.cpp"):
42
+ subprocess.run(["rm", "-rf", "llama.cpp"])
43
+ subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
44
+
45
+ print("Installing llama.cpp requirements...")
46
+ subprocess.run(["pip", "install", "-r", "llama.cpp/requirements.txt"], check=True)
47
+
48
+ print("Building llama-quantize with CMake...")
49
+ os.makedirs("llama.cpp/build", exist_ok=True)
50
+ subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp"], check=True)
51
+ subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j"], check=True)
52
+
53
+ print("Converting to GGUF (FP16)...")
54
+ # Convert to GGUF (BF16/FP16 preserved from model)
55
+ subprocess.run([
56
+ "python", "llama.cpp/convert_hf_to_gguf.py",
57
+ MERGED_DIR,
58
+ "--outfile", "merged_model.gguf",
59
+ "--outtype", "bf16"
60
+ ], check=True)
61
+
62
+ print("Quantizing to Q4_K_M...")
63
+ quantize_bin = "llama.cpp/build/bin/llama-quantize"
64
+ subprocess.run([
65
+ quantize_bin,
66
+ "merged_model.gguf",
67
+ GGUF_FILE,
68
+ "Q4_K_M"
69
+ ], check=True)
70
+
71
+ print(f"Creating repo {OUTPUT_REPO}...")
72
+ api = HfApi()
73
+ create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
74
+
75
+ print(f"Uploading {GGUF_FILE}...")
76
+ api.upload_file(
77
+ path_or_fileobj=GGUF_FILE,
78
+ path_in_repo=GGUF_FILE,
79
+ repo_id=OUTPUT_REPO,
80
+ repo_type="model"
81
+ )
82
+
83
+ print("Done! GGUF available at:", f"https://huggingface.co/{OUTPUT_REPO}")