nathens
/

training-scripts

nathens commited on Dec 18, 2025

Commit

7d112ab

verified ·

1 Parent(s): 1fbdc40

Upload convert_to_gguf.py with huggingface_hub

Files changed (1) hide show

convert_to_gguf.py CHANGED Viewed

@@ -29,7 +29,7 @@ print(f"  Quantization: {QUANTIZATION}")
 print("\n📦 Loading base model and tokenizer...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
-    torch_dtype=torch.float16,
     device_map="auto",
     trust_remote_code=True
 )
@@ -53,7 +53,12 @@ print("\n📥 Installing llama.cpp for GGUF conversion...")
 subprocess.run(["apt-get", "update", "-qq"], check=True)
 subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
 subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
-subprocess.run(["make", "-C", "llama.cpp", "-j", "$(nproc)"], check=True, shell=True)
 # Step 5: Convert to GGUF format
 print("\n🔄 Converting to GGUF format...")

 print("\n📦 Loading base model and tokenizer...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
+    dtype=torch.float16,
     device_map="auto",
     trust_remote_code=True
 )
 subprocess.run(["apt-get", "update", "-qq"], check=True)
 subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
 subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
+# Get number of processors
+nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
+nproc = nproc_result.stdout.strip()
+print(f"Building llama.cpp with {nproc} cores...")
+subprocess.run(["make", "-C", "llama.cpp", "-j", nproc], check=True)
 # Step 5: Convert to GGUF format
 print("\n🔄 Converting to GGUF format...")