RayMelius Claude Sonnet 4.6 commited on
Commit
97d584e
Β·
1 Parent(s): 58392d5

Fix GGUF export: clone llama.cpp for bundled gguf-py; update Modelfile

Browse files

- Clone llama.cpp shallow repo instead of downloading single script;
its bundled gguf-py is always in sync with convert_hf_to_gguf.py,
fixing MistralTokenizerType import error against PyPI gguf 0.17.1
- Add PYTHONPATH override so convert picks up llama.cpp's gguf-py
- Modelfile FROM now points to 7b-q4_k_m.gguf (4683 MB, Q4_K_M)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. Modelfile +2 -1
  2. scripts/export_gguf_windows.py +28 -21
Modelfile CHANGED
@@ -19,7 +19,8 @@
19
  # Works immediately β€” pulls qwen2.5:0.5b from Ollama.
20
  # No fine-tuning, but correct system prompt and parameters.
21
  #
22
- FROM qwen2.5:0.5b
 
23
 
24
  SYSTEM """You are the reasoning engine for Soci, an LLM-powered city population simulator. \
25
  You control AI agents (NPCs) living in a city. Each agent has a persona, needs \
 
19
  # Works immediately β€” pulls qwen2.5:0.5b from Ollama.
20
  # No fine-tuning, but correct system prompt and parameters.
21
  #
22
+ #FROM qwen2.5:0.5b
23
+ FROM ./data/training/7b/gguf/7b-q4_k_m.gguf
24
 
25
  SYSTEM """You are the reasoning engine for Soci, an LLM-powered city population simulator. \
26
  You control AI agents (NPCs) living in a city. Each agent has a persona, needs \
scripts/export_gguf_windows.py CHANGED
@@ -109,30 +109,37 @@ else:
109
  )
110
  print(f" Merged model saved.")
111
 
112
- # ── Step 2: Get convert_hf_to_gguf.py ────────────────────────────────────────
 
 
113
  print(f"\n=== Step 2: Prepare llama.cpp convert script ===")
114
 
115
- CONVERT_SCRIPT = CONVERT_CACHE / "convert_hf_to_gguf.py"
116
- CONVERT_REQS = CONVERT_CACHE / "requirements_convert.txt"
 
117
 
118
- if not CONVERT_SCRIPT.exists():
119
- BASE_URL = "https://raw.githubusercontent.com/ggml-org/llama.cpp/master"
120
- print(f" Downloading convert_hf_to_gguf.py ...")
121
- urllib.request.urlretrieve(f"{BASE_URL}/convert_hf_to_gguf.py", CONVERT_SCRIPT)
122
-
123
- # Also download the requirements file (needed for sentencepiece / tiktoken)
124
- try:
125
- urllib.request.urlretrieve(
126
- f"{BASE_URL}/requirements/requirements-convert_hf_to_gguf.txt",
127
- CONVERT_REQS,
128
- )
129
- print(f" Installing convert dependencies ...")
130
- subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
131
- "-r", str(CONVERT_REQS)])
132
- except Exception as e:
133
- print(f" [WARN] Could not fetch/install convert requirements: {e}")
134
  else:
135
- print(f" Using cached {CONVERT_SCRIPT}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  # ── Step 3: Convert merged model β†’ F16 GGUF ──────────────────────────────────
138
  print(f"\n=== Step 3: Convert to F16 GGUF ===")
@@ -149,7 +156,7 @@ else:
149
  "--outtype", "f16",
150
  ]
151
  print(f" Running: {' '.join(cmd)}")
152
- result = subprocess.run(cmd, capture_output=False)
153
  if result.returncode != 0:
154
  print(f"[ERROR] Conversion failed (exit {result.returncode})")
155
  sys.exit(1)
 
109
  )
110
  print(f" Merged model saved.")
111
 
112
+ # ── Step 2: Clone/update llama.cpp repo (shallow) ────────────────────────────
113
+ # We clone the full repo so the convert script uses its own bundled gguf-py,
114
+ # which is always in sync with the script (PyPI gguf lags behind llama.cpp master).
115
  print(f"\n=== Step 2: Prepare llama.cpp convert script ===")
116
 
117
+ LLAMA_REPO = CONVERT_CACHE / "llama.cpp"
118
+ CONVERT_SCRIPT = LLAMA_REPO / "convert_hf_to_gguf.py"
119
+ LLAMA_GGUF_PY = LLAMA_REPO / "gguf-py"
120
 
121
+ if LLAMA_REPO.exists() and CONVERT_SCRIPT.exists():
122
+ print(f" Repo cached at {LLAMA_REPO} β€” pulling latest ...")
123
+ subprocess.run(["git", "-C", str(LLAMA_REPO), "pull", "--ff-only", "-q"], check=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  else:
125
+ print(f" Cloning llama.cpp (shallow) into {LLAMA_REPO} ...")
126
+ subprocess.check_call([
127
+ "git", "clone", "--depth=1", "--filter=blob:none",
128
+ "https://github.com/ggml-org/llama.cpp.git",
129
+ str(LLAMA_REPO),
130
+ ])
131
+ print(f" Installing llama.cpp gguf-py + convert dependencies ...")
132
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
133
+ str(LLAMA_GGUF_PY)])
134
+ reqs = LLAMA_REPO / "requirements" / "requirements-convert_hf_to_gguf.txt"
135
+ if reqs.exists():
136
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-r", str(reqs)])
137
+
138
+ # Build PYTHONPATH so convert script picks up llama.cpp's gguf-py over PyPI's
139
+ _convert_env = os.environ.copy()
140
+ _convert_env["PYTHONPATH"] = str(LLAMA_GGUF_PY / "src") + os.pathsep + _convert_env.get("PYTHONPATH", "")
141
+
142
+ print(f" Convert script: {CONVERT_SCRIPT}")
143
 
144
  # ── Step 3: Convert merged model β†’ F16 GGUF ──────────────────────────────────
145
  print(f"\n=== Step 3: Convert to F16 GGUF ===")
 
156
  "--outtype", "f16",
157
  ]
158
  print(f" Running: {' '.join(cmd)}")
159
+ result = subprocess.run(cmd, capture_output=False, env=_convert_env)
160
  if result.returncode != 0:
161
  print(f"[ERROR] Conversion failed (exit {result.returncode})")
162
  sys.exit(1)