Fix GGUF export: clone llama.cpp for bundled gguf-py; update Modelfile
Browse files- Clone llama.cpp shallow repo instead of downloading single script;
its bundled gguf-py is always in sync with convert_hf_to_gguf.py,
fixing MistralTokenizerType import error against PyPI gguf 0.17.1
- Add PYTHONPATH override so convert picks up llama.cpp's gguf-py
- Modelfile FROM now points to 7b-q4_k_m.gguf (4683 MB, Q4_K_M)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- Modelfile +2 -1
- scripts/export_gguf_windows.py +28 -21
Modelfile
CHANGED
|
@@ -19,7 +19,8 @@
|
|
| 19 |
# Works immediately β pulls qwen2.5:0.5b from Ollama.
|
| 20 |
# No fine-tuning, but correct system prompt and parameters.
|
| 21 |
#
|
| 22 |
-
FROM qwen2.5:0.5b
|
|
|
|
| 23 |
|
| 24 |
SYSTEM """You are the reasoning engine for Soci, an LLM-powered city population simulator. \
|
| 25 |
You control AI agents (NPCs) living in a city. Each agent has a persona, needs \
|
|
|
|
| 19 |
# Works immediately β pulls qwen2.5:0.5b from Ollama.
|
| 20 |
# No fine-tuning, but correct system prompt and parameters.
|
| 21 |
#
|
| 22 |
+
#FROM qwen2.5:0.5b
|
| 23 |
+
FROM ./data/training/7b/gguf/7b-q4_k_m.gguf
|
| 24 |
|
| 25 |
SYSTEM """You are the reasoning engine for Soci, an LLM-powered city population simulator. \
|
| 26 |
You control AI agents (NPCs) living in a city. Each agent has a persona, needs \
|
scripts/export_gguf_windows.py
CHANGED
|
@@ -109,30 +109,37 @@ else:
|
|
| 109 |
)
|
| 110 |
print(f" Merged model saved.")
|
| 111 |
|
| 112 |
-
# ββ Step 2:
|
|
|
|
|
|
|
| 113 |
print(f"\n=== Step 2: Prepare llama.cpp convert script ===")
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
|
|
|
| 117 |
|
| 118 |
-
if
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
urllib.request.urlretrieve(f"{BASE_URL}/convert_hf_to_gguf.py", CONVERT_SCRIPT)
|
| 122 |
-
|
| 123 |
-
# Also download the requirements file (needed for sentencepiece / tiktoken)
|
| 124 |
-
try:
|
| 125 |
-
urllib.request.urlretrieve(
|
| 126 |
-
f"{BASE_URL}/requirements/requirements-convert_hf_to_gguf.txt",
|
| 127 |
-
CONVERT_REQS,
|
| 128 |
-
)
|
| 129 |
-
print(f" Installing convert dependencies ...")
|
| 130 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
|
| 131 |
-
"-r", str(CONVERT_REQS)])
|
| 132 |
-
except Exception as e:
|
| 133 |
-
print(f" [WARN] Could not fetch/install convert requirements: {e}")
|
| 134 |
else:
|
| 135 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
# ββ Step 3: Convert merged model β F16 GGUF ββββββββββββββββββββββββββββββββββ
|
| 138 |
print(f"\n=== Step 3: Convert to F16 GGUF ===")
|
|
@@ -149,7 +156,7 @@ else:
|
|
| 149 |
"--outtype", "f16",
|
| 150 |
]
|
| 151 |
print(f" Running: {' '.join(cmd)}")
|
| 152 |
-
result = subprocess.run(cmd, capture_output=False)
|
| 153 |
if result.returncode != 0:
|
| 154 |
print(f"[ERROR] Conversion failed (exit {result.returncode})")
|
| 155 |
sys.exit(1)
|
|
|
|
| 109 |
)
|
| 110 |
print(f" Merged model saved.")
|
| 111 |
|
| 112 |
+
# ββ Step 2: Clone/update llama.cpp repo (shallow) ββββββββββββββββββββββββββββ
|
| 113 |
+
# We clone the full repo so the convert script uses its own bundled gguf-py,
|
| 114 |
+
# which is always in sync with the script (PyPI gguf lags behind llama.cpp master).
|
| 115 |
print(f"\n=== Step 2: Prepare llama.cpp convert script ===")
|
| 116 |
|
| 117 |
+
LLAMA_REPO = CONVERT_CACHE / "llama.cpp"
|
| 118 |
+
CONVERT_SCRIPT = LLAMA_REPO / "convert_hf_to_gguf.py"
|
| 119 |
+
LLAMA_GGUF_PY = LLAMA_REPO / "gguf-py"
|
| 120 |
|
| 121 |
+
if LLAMA_REPO.exists() and CONVERT_SCRIPT.exists():
|
| 122 |
+
print(f" Repo cached at {LLAMA_REPO} β pulling latest ...")
|
| 123 |
+
subprocess.run(["git", "-C", str(LLAMA_REPO), "pull", "--ff-only", "-q"], check=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
else:
|
| 125 |
+
print(f" Cloning llama.cpp (shallow) into {LLAMA_REPO} ...")
|
| 126 |
+
subprocess.check_call([
|
| 127 |
+
"git", "clone", "--depth=1", "--filter=blob:none",
|
| 128 |
+
"https://github.com/ggml-org/llama.cpp.git",
|
| 129 |
+
str(LLAMA_REPO),
|
| 130 |
+
])
|
| 131 |
+
print(f" Installing llama.cpp gguf-py + convert dependencies ...")
|
| 132 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
|
| 133 |
+
str(LLAMA_GGUF_PY)])
|
| 134 |
+
reqs = LLAMA_REPO / "requirements" / "requirements-convert_hf_to_gguf.txt"
|
| 135 |
+
if reqs.exists():
|
| 136 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-r", str(reqs)])
|
| 137 |
+
|
| 138 |
+
# Build PYTHONPATH so convert script picks up llama.cpp's gguf-py over PyPI's
|
| 139 |
+
_convert_env = os.environ.copy()
|
| 140 |
+
_convert_env["PYTHONPATH"] = str(LLAMA_GGUF_PY / "src") + os.pathsep + _convert_env.get("PYTHONPATH", "")
|
| 141 |
+
|
| 142 |
+
print(f" Convert script: {CONVERT_SCRIPT}")
|
| 143 |
|
| 144 |
# ββ Step 3: Convert merged model β F16 GGUF ββββββββββββββββββββββββββββββββββ
|
| 145 |
print(f"\n=== Step 3: Convert to F16 GGUF ===")
|
|
|
|
| 156 |
"--outtype", "f16",
|
| 157 |
]
|
| 158 |
print(f" Running: {' '.join(cmd)}")
|
| 159 |
+
result = subprocess.run(cmd, capture_output=False, env=_convert_env)
|
| 160 |
if result.returncode != 0:
|
| 161 |
print(f"[ERROR] Conversion failed (exit {result.returncode})")
|
| 162 |
sys.exit(1)
|