Bopalv commited on
Commit
dff2191
·
verified ·
1 Parent(s): 74ba17f

Upload DPO-Training/quantize_dpo_model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. DPO-Training/quantize_dpo_model.py +127 -0
DPO-Training/quantize_dpo_model.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quantize DPO-trained Qwen3-0.6B model to GGUF format.
4
+
5
+ Usage:
6
+ python quantize_dpo_model.py --model_path ./qwen3-0.6b-dpo-merged
7
+ python quantize_dpo_model.py --model_path ./qwen3-0.6b-dpo-merged --quantization Q4_K_M
8
+ """
9
+
10
+ import argparse
11
+ import subprocess
12
+ import os
13
+ import sys
14
+
15
+
16
+ def main():
17
+ parser = argparse.ArgumentParser(description="Quantize DPO model to GGUF")
18
+ parser.add_argument("--model_path", required=True, help="Path to merged DPO model")
19
+ parser.add_argument(
20
+ "--output_name",
21
+ default=None,
22
+ help="Output GGUF file name (auto-generated if not set)",
23
+ )
24
+ parser.add_argument(
25
+ "--quantization",
26
+ default="Q4_K_S",
27
+ choices=[
28
+ "Q2_K",
29
+ "Q3_K_S",
30
+ "Q3_K_M",
31
+ "Q3_K_L",
32
+ "Q4_K_S",
33
+ "Q4_K_M",
34
+ "Q5_K_S",
35
+ "Q5_K_M",
36
+ "Q6_K",
37
+ "Q8_0",
38
+ ],
39
+ help="Quantization type",
40
+ )
41
+ parser.add_argument(
42
+ "--convert_script",
43
+ default="/home/ma/prima.cpp/convert_hf_to_gguf.py",
44
+ help="Path to llama.cpp conversion script",
45
+ )
46
+
47
+ args = parser.parse_args()
48
+
49
+ # Generate output name if not provided
50
+ if args.output_name is None:
51
+ model_name = os.path.basename(args.model_path.rstrip("/"))
52
+ args.output_name = f"{model_name}-{args.quantization}.gguf"
53
+
54
+ print("=" * 60)
55
+ print("DPO Model Quantization")
56
+ print("=" * 60)
57
+ print(f"Model: {args.model_path}")
58
+ print(f"Quantization: {args.quantization}")
59
+ print(f"Output: {args.output_name}")
60
+ print("=" * 60)
61
+
62
+ # Check if model path exists
63
+ if not os.path.exists(args.model_path):
64
+ print(f"❌ Error: Model path not found: {args.model_path}")
65
+ sys.exit(1)
66
+
67
+ # Step 1: Convert to GGUF (FP16)
68
+ print("\n📥 Converting to GGUF format...")
69
+ temp_gguf = f"{args.output_name}.temp"
70
+
71
+ convert_cmd = [
72
+ sys.executable,
73
+ args.convert_script,
74
+ args.model_path,
75
+ "--outfile",
76
+ temp_gguf,
77
+ "--outtype",
78
+ "f16",
79
+ ]
80
+
81
+ print(f"Running: {' '.join(convert_cmd)}")
82
+ result = subprocess.run(convert_cmd, capture_output=True, text=True)
83
+
84
+ if result.returncode != 0:
85
+ print(f"❌ Conversion failed: {result.stderr}")
86
+ sys.exit(1)
87
+
88
+ print("✅ Conversion complete")
89
+
90
+ # Step 2: Quantize
91
+ print(f"\n🔧 Quantizing to {args.quantization}...")
92
+
93
+ quantize_cmd = [
94
+ "/home/ma/prima.cpp/llama-quantize",
95
+ temp_gguf,
96
+ args.output_name,
97
+ args.quantization,
98
+ ]
99
+
100
+ print(f"Running: {' '.join(quantize_cmd)}")
101
+ result = subprocess.run(quantize_cmd, capture_output=True, text=True)
102
+
103
+ if result.returncode != 0:
104
+ print(f"❌ Quantization failed: {result.stderr}")
105
+ sys.exit(1)
106
+
107
+ print("✅ Quantization complete")
108
+
109
+ # Clean up temp file
110
+ if os.path.exists(temp_gguf):
111
+ os.remove(temp_gguf)
112
+ print("🧹 Cleaned up temporary files")
113
+
114
+ # Get file size
115
+ if os.path.exists(args.output_name):
116
+ size_mb = os.path.getsize(args.output_name) / (1024 * 1024)
117
+ print(f"\n📦 Output file: {args.output_name} ({size_mb:.1f} MB)")
118
+
119
+ print("\n" + "=" * 60)
120
+ print("✅ Quantization Complete!")
121
+ print("=" * 60)
122
+ print(f"Quantized model: {args.output_name}")
123
+ print(f"Ready for deployment!")
124
+
125
+
126
+ if __name__ == "__main__":
127
+ main()