neuralbroker commited on
Commit
b56b94e
Β·
verified Β·
1 Parent(s): 31c9421

Add scripts/export_production.py

Browse files
Files changed (1) hide show
  1. scripts/export_production.py +188 -0
scripts/export_production.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Merge LoRA adapter β†’ HuggingFace model β†’ GGUF (blitzkode.gguf).
3
+
4
+ Pipeline
5
+ --------
6
+ 1. Load base model + LoRA adapter, merge and unload adapters.
7
+ 2. Save merged HuggingFace model to <output_dir>/merged/.
8
+ 3. Download llama.cpp convert_hf_to_gguf.py from GitHub if not present.
9
+ 4. Run GGUF conversion (quantised Q4_K_M by default).
10
+ 5. Verify the output GGUF is loadable with llama-cpp-python.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import json
17
+ import subprocess
18
+ import sys
19
+ import urllib.request
20
+ from pathlib import Path
21
+
22
+ REPO_ROOT = Path(__file__).resolve().parents[1]
23
+ DEFAULT_CHECKPOINT = REPO_ROOT / "checkpoints" / "blitzkode-1.5b-lora" / "final"
24
+ DEFAULT_MERGED_DIR = REPO_ROOT / "exported" / "merged"
25
+ DEFAULT_GGUF_OUT = REPO_ROOT / "blitzkode.gguf"
26
+ LLAMA_CPP_SCRIPTS_DIR = REPO_ROOT / "llama.cpp"
27
+ CONVERT_SCRIPT_URL = (
28
+ "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/convert_hf_to_gguf.py"
29
+ )
30
+
31
+
32
+ def parse_args() -> argparse.Namespace:
33
+ p = argparse.ArgumentParser(description=__doc__)
34
+ p.add_argument("--checkpoint", type=Path, default=DEFAULT_CHECKPOINT)
35
+ p.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
36
+ p.add_argument("--gguf-out", type=Path, default=DEFAULT_GGUF_OUT)
37
+ p.add_argument("--quant-type", default="q4_k_m", help="GGUF quantisation type (q4_k_m, q8_0, f16, …)")
38
+ p.add_argument("--skip-merge", action="store_true", help="Skip merge step; use --merged-dir as-is.")
39
+ p.add_argument("--skip-gguf", action="store_true", help="Skip GGUF conversion (only merge).")
40
+ p.add_argument("--verify", action="store_true", default=True, help="Verify GGUF is loadable (default: on).")
41
+ p.add_argument("--no-verify", dest="verify", action="store_false")
42
+ return p.parse_args()
43
+
44
+
45
+ # ─── Step 1: Merge ────────────────────────────────────────────────────────────
46
+
47
+ def merge_adapter(checkpoint: Path, merged_dir: Path) -> None:
48
+ print("\n[1/3] Merging LoRA adapter into base model …")
49
+ import torch # noqa: PLC0415
50
+ from peft import PeftModel # noqa: PLC0415
51
+ from transformers import AutoModelForCausalLM, AutoTokenizer # noqa: PLC0415
52
+
53
+ config_path = checkpoint / "adapter_config.json"
54
+ if not config_path.exists():
55
+ raise SystemExit(f"adapter_config.json not found: {config_path}")
56
+
57
+ with config_path.open() as fh:
58
+ adapter_config = json.load(fh)
59
+ base_name = adapter_config["base_model_name_or_path"]
60
+ print(f" Checkpoint : {checkpoint}")
61
+ print(f" Base model : {base_name}")
62
+
63
+ dtype = torch.float16
64
+ print(" Loading base model …")
65
+ base = AutoModelForCausalLM.from_pretrained(
66
+ base_name,
67
+ dtype=dtype,
68
+ device_map="cpu",
69
+ trust_remote_code=True,
70
+ )
71
+ tokenizer = AutoTokenizer.from_pretrained(str(checkpoint), trust_remote_code=True)
72
+
73
+ print(" Loading & merging adapter …")
74
+ model = PeftModel.from_pretrained(base, str(checkpoint))
75
+ model = model.merge_and_unload()
76
+
77
+ merged_dir.mkdir(parents=True, exist_ok=True)
78
+ print(f" Saving merged model to {merged_dir} …")
79
+ model.save_pretrained(str(merged_dir))
80
+ tokenizer.save_pretrained(str(merged_dir))
81
+ print(" Merge complete.")
82
+
83
+
84
+ # ─── Step 2: Download convert script ─────────────────────────────────────────
85
+
86
+ def ensure_convert_script() -> Path:
87
+ LLAMA_CPP_SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
88
+ convert_script = LLAMA_CPP_SCRIPTS_DIR / "convert_hf_to_gguf.py"
89
+
90
+ if not convert_script.exists():
91
+ print(f"\n Downloading convert_hf_to_gguf.py from llama.cpp GitHub …")
92
+ try:
93
+ req = urllib.request.Request(CONVERT_SCRIPT_URL, headers={"User-Agent": "BlitzKode/2.0"})
94
+ with urllib.request.urlopen(req, timeout=60) as response:
95
+ content = response.read()
96
+ convert_script.write_bytes(content)
97
+ print(f" Saved to: {convert_script}")
98
+ except Exception as exc:
99
+ raise SystemExit(f"Failed to download convert_hf_to_gguf.py: {exc}") from exc
100
+ else:
101
+ print(f"\n Using cached: {convert_script}")
102
+
103
+ return convert_script
104
+
105
+
106
+ # ─── Step 3: GGUF conversion ──────────────────────────────────────────────────
107
+
108
+ def convert_to_gguf(merged_dir: Path, gguf_out: Path, quant_type: str) -> None:
109
+ print(f"\n[2/3] Converting to GGUF ({quant_type}) …")
110
+ convert_script = ensure_convert_script()
111
+
112
+ cmd = [
113
+ sys.executable,
114
+ str(convert_script),
115
+ str(merged_dir),
116
+ "--outfile",
117
+ str(gguf_out),
118
+ "--outtype",
119
+ quant_type,
120
+ ]
121
+ print(f" Running: {' '.join(cmd)}")
122
+ result = subprocess.run(cmd, capture_output=False, text=True)
123
+ if result.returncode != 0:
124
+ raise SystemExit(f"GGUF conversion failed (exit code {result.returncode}).")
125
+ print(f" GGUF written: {gguf_out} ({gguf_out.stat().st_size / 1024**3:.2f} GB)")
126
+
127
+
128
+ # ─── Step 4: Verify ───────────────────────────────────────────────────────────
129
+
130
+ def verify_gguf(gguf_path: Path) -> None:
131
+ print(f"\n[3/3] Verifying GGUF with llama-cpp-python …")
132
+ try:
133
+ import llama_cpp # noqa: PLC0415
134
+
135
+ llm = llama_cpp.Llama(
136
+ model_path=str(gguf_path),
137
+ n_ctx=128,
138
+ n_threads=2,
139
+ n_gpu_layers=0,
140
+ verbose=False,
141
+ )
142
+ prompt = "<|im_start|>user\nSay hello.<|im_end|>\n<|im_start|>assistant\n"
143
+ out = llm(prompt, max_tokens=8, stop=["<|im_end|>"])
144
+ text = out["choices"][0]["text"].strip()
145
+ print(f" Sample output: {text!r}")
146
+ print(" Verification PASSED.")
147
+ except Exception as exc:
148
+ print(f" [WARN] Verification raised: {exc}")
149
+ print(" GGUF was written; manual verification recommended.")
150
+
151
+
152
+ # ─── Main ─────────────────────────────────────────────────────────────────────
153
+
154
+ def main() -> None:
155
+ args = parse_args()
156
+
157
+ print("=" * 72)
158
+ print("BLITZKODE PRODUCTION EXPORT")
159
+ print("=" * 72)
160
+
161
+ if not args.skip_merge:
162
+ merge_adapter(args.checkpoint, args.merged_dir)
163
+ else:
164
+ print("\n[1/3] Merge skipped (--skip-merge).")
165
+
166
+ if not args.merged_dir.exists():
167
+ raise SystemExit(f"Merged dir not found: {args.merged_dir}")
168
+
169
+ if not args.skip_gguf:
170
+ convert_to_gguf(args.merged_dir, args.gguf_out, args.quant_type)
171
+ else:
172
+ print("\n[2/3] GGUF conversion skipped (--skip-gguf).")
173
+
174
+ if args.verify and args.gguf_out.exists():
175
+ verify_gguf(args.gguf_out)
176
+
177
+ print("\n" + "=" * 72)
178
+ print("EXPORT COMPLETE")
179
+ print(f" Merged HF model : {args.merged_dir}")
180
+ if args.gguf_out.exists():
181
+ print(f" GGUF model : {args.gguf_out}")
182
+ print("\nNext steps:")
183
+ print(" python scripts/push_to_hub.py")
184
+ print(" python server.py")
185
+
186
+
187
+ if __name__ == "__main__":
188
+ main()