Spaces:

RayMelius
/

soci2

Paused

RayMelius Claude Sonnet 4.6 commited on Feb 26

Commit

97d584e

1 Parent(s): 58392d5

Fix GGUF export: clone llama.cpp for bundled gguf-py; update Modelfile

- Clone llama.cpp shallow repo instead of downloading single script;
its bundled gguf-py is always in sync with convert_hf_to_gguf.py,
fixing MistralTokenizerType import error against PyPI gguf 0.17.1
- Add PYTHONPATH override so convert picks up llama.cpp's gguf-py
- Modelfile FROM now points to 7b-q4_k_m.gguf (4683 MB, Q4_K_M)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show

Modelfile +2 -1
scripts/export_gguf_windows.py +28 -21

Modelfile CHANGED Viewed

@@ -19,7 +19,8 @@
 #   Works immediately — pulls qwen2.5:0.5b from Ollama.
 #   No fine-tuning, but correct system prompt and parameters.
 #
-FROM qwen2.5:0.5b
 SYSTEM """You are the reasoning engine for Soci, an LLM-powered city population simulator. \
 You control AI agents (NPCs) living in a city. Each agent has a persona, needs \

 #   Works immediately — pulls qwen2.5:0.5b from Ollama.
 #   No fine-tuning, but correct system prompt and parameters.
 #
+#FROM qwen2.5:0.5b
+FROM ./data/training/7b/gguf/7b-q4_k_m.gguf
 SYSTEM """You are the reasoning engine for Soci, an LLM-powered city population simulator. \
 You control AI agents (NPCs) living in a city. Each agent has a persona, needs \

scripts/export_gguf_windows.py CHANGED Viewed

@@ -109,30 +109,37 @@ else:
     )
     print(f"  Merged model saved.")
-# ── Step 2: Get convert_hf_to_gguf.py ────────────────────────────────────────
 print(f"\n=== Step 2: Prepare llama.cpp convert script ===")
-CONVERT_SCRIPT = CONVERT_CACHE / "convert_hf_to_gguf.py"
-CONVERT_REQS   = CONVERT_CACHE / "requirements_convert.txt"
-if not CONVERT_SCRIPT.exists():
-    BASE_URL = "https://raw.githubusercontent.com/ggml-org/llama.cpp/master"
-    print(f"  Downloading convert_hf_to_gguf.py ...")
-    urllib.request.urlretrieve(f"{BASE_URL}/convert_hf_to_gguf.py", CONVERT_SCRIPT)
-    # Also download the requirements file (needed for sentencepiece / tiktoken)
-    try:
-        urllib.request.urlretrieve(
-            f"{BASE_URL}/requirements/requirements-convert_hf_to_gguf.txt",
-            CONVERT_REQS,
-        )
-        print(f"  Installing convert dependencies ...")
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
-                               "-r", str(CONVERT_REQS)])
-    except Exception as e:
-        print(f"  [WARN] Could not fetch/install convert requirements: {e}")
 else:
-    print(f"  Using cached {CONVERT_SCRIPT}")
 # ── Step 3: Convert merged model → F16 GGUF ──────────────────────────────────
 print(f"\n=== Step 3: Convert to F16 GGUF ===")
@@ -149,7 +156,7 @@ else:
         "--outtype", "f16",
     ]
     print(f"  Running: {' '.join(cmd)}")
-    result = subprocess.run(cmd, capture_output=False)
     if result.returncode != 0:
         print(f"[ERROR] Conversion failed (exit {result.returncode})")
         sys.exit(1)

     )
     print(f"  Merged model saved.")
+# ── Step 2: Clone/update llama.cpp repo (shallow) ────────────────────────────
+# We clone the full repo so the convert script uses its own bundled gguf-py,
+# which is always in sync with the script (PyPI gguf lags behind llama.cpp master).
 print(f"\n=== Step 2: Prepare llama.cpp convert script ===")
+LLAMA_REPO    = CONVERT_CACHE / "llama.cpp"
+CONVERT_SCRIPT = LLAMA_REPO / "convert_hf_to_gguf.py"
+LLAMA_GGUF_PY  = LLAMA_REPO / "gguf-py"
+if LLAMA_REPO.exists() and CONVERT_SCRIPT.exists():
+    print(f"  Repo cached at {LLAMA_REPO} — pulling latest ...")
+    subprocess.run(["git", "-C", str(LLAMA_REPO), "pull", "--ff-only", "-q"], check=False)
 else:
+    print(f"  Cloning llama.cpp (shallow) into {LLAMA_REPO} ...")
+    subprocess.check_call([
+        "git", "clone", "--depth=1", "--filter=blob:none",
+        "https://github.com/ggml-org/llama.cpp.git",
+        str(LLAMA_REPO),
+    ])
+    print(f"  Installing llama.cpp gguf-py + convert dependencies ...")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
+                           str(LLAMA_GGUF_PY)])
+    reqs = LLAMA_REPO / "requirements" / "requirements-convert_hf_to_gguf.txt"
+    if reqs.exists():
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-r", str(reqs)])
+# Build PYTHONPATH so convert script picks up llama.cpp's gguf-py over PyPI's
+_convert_env = os.environ.copy()
+_convert_env["PYTHONPATH"] = str(LLAMA_GGUF_PY / "src") + os.pathsep + _convert_env.get("PYTHONPATH", "")
+print(f"  Convert script: {CONVERT_SCRIPT}")
 # ── Step 3: Convert merged model → F16 GGUF ──────────────────────────────────
 print(f"\n=== Step 3: Convert to F16 GGUF ===")
         "--outtype", "f16",
     ]
     print(f"  Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd, capture_output=False, env=_convert_env)
     if result.returncode != 0:
         print(f"[ERROR] Conversion failed (exit {result.returncode})")
         sys.exit(1)