Upload convert_to_gguf.py with huggingface_hub
Browse files- convert_to_gguf.py +7 -2
convert_to_gguf.py
CHANGED
|
@@ -29,7 +29,7 @@ print(f" Quantization: {QUANTIZATION}")
|
|
| 29 |
print("\n📦 Loading base model and tokenizer...")
|
| 30 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
BASE_MODEL,
|
| 32 |
-
|
| 33 |
device_map="auto",
|
| 34 |
trust_remote_code=True
|
| 35 |
)
|
|
@@ -53,7 +53,12 @@ print("\n📥 Installing llama.cpp for GGUF conversion...")
|
|
| 53 |
subprocess.run(["apt-get", "update", "-qq"], check=True)
|
| 54 |
subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
|
| 55 |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# Step 5: Convert to GGUF format
|
| 59 |
print("\n🔄 Converting to GGUF format...")
|
|
|
|
| 29 |
print("\n📦 Loading base model and tokenizer...")
|
| 30 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
BASE_MODEL,
|
| 32 |
+
dtype=torch.float16,
|
| 33 |
device_map="auto",
|
| 34 |
trust_remote_code=True
|
| 35 |
)
|
|
|
|
| 53 |
subprocess.run(["apt-get", "update", "-qq"], check=True)
|
| 54 |
subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
|
| 55 |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
|
| 56 |
+
|
| 57 |
+
# Get number of processors
|
| 58 |
+
nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
|
| 59 |
+
nproc = nproc_result.stdout.strip()
|
| 60 |
+
print(f"Building llama.cpp with {nproc} cores...")
|
| 61 |
+
subprocess.run(["make", "-C", "llama.cpp", "-j", nproc], check=True)
|
| 62 |
|
| 63 |
# Step 5: Convert to GGUF format
|
| 64 |
print("\n🔄 Converting to GGUF format...")
|