scrubdata / scripts /modal_export_gguf.py
OpenAI Codex
deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build
16dc556
Raw
History Blame Contribute Delete
2.68 kB
"""Export the blessed v6 adapter (Modal volume) -> merged bf16 -> GGUF Q8_0 -> HF Hub.
Q8_0 only: Q4_K_M corrupts this model (documented v4 failure). CPU instance (merge +
convert are RAM-bound, no GPU needed). The HF token is passed as a function argument at
spawn time — transient, never baked into the image or logged.
uv run modal run scripts/modal_export_gguf.py --repo ricalanis/scrubdata-qwen3-4b-v6-q8
"""
import modal
image = (
modal.Image.debian_slim(python_version="3.11")
.apt_install("git")
.pip_install("torch", "transformers>=4.45", "peft", "accelerate", "sentencepiece",
"huggingface_hub", "gguf", "numpy", "safetensors")
.run_commands("git clone --depth 1 https://github.com/ggml-org/llama.cpp /llama.cpp")
)
app = modal.App("scrubdata-export-gguf", image=image)
adapter_vol = modal.Volume.from_name("scrubdata-v5-adapter")
@app.function(cpu=8, memory=49152, timeout=3600, volumes={"/vol": adapter_vol})
def export(repo: str, hf_token: str, adapter: str = "/vol/v5_seed21"):
import subprocess
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from huggingface_hub import HfApi
base_id = "unsloth/Qwen3-4B-Instruct-2507"
print("loading base + adapter (CPU, bf16)...", flush=True)
base = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=torch.bfloat16)
model = PeftModel.from_pretrained(base, adapter).merge_and_unload()
tok = AutoTokenizer.from_pretrained(base_id)
model.save_pretrained("/tmp/merged", safe_serialization=True)
tok.save_pretrained("/tmp/merged")
print("merged model saved", flush=True)
out = "/tmp/scrubdata-qwen3-4b-v6.Q8_0.gguf"
subprocess.run(["python", "/llama.cpp/convert_hf_to_gguf.py", "/tmp/merged",
"--outfile", out, "--outtype", "q8_0"], check=True)
import os
print(f"GGUF ready: {os.path.getsize(out)/1e9:.2f} GB", flush=True)
api = HfApi(token=hf_token)
api.create_repo(repo, repo_type="model", exist_ok=True)
api.upload_file(path_or_fileobj=out, path_in_repo="scrubdata-qwen3-4b-v6.Q8_0.gguf",
repo_id=repo, repo_type="model",
commit_message="v6 (mixA): Q8_0 GGUF — hospital repair 0.475/0.185")
print(f"uploaded to https://huggingface.co/{repo}", flush=True)
return repo
@app.local_entrypoint()
def main(repo: str = "ricalanis/scrubdata-qwen3-4b-v6-q8"):
from huggingface_hub import get_token
token = get_token()
assert token, "no local HF token found"
call = export.spawn(repo=repo, hf_token=token)
print(f"Launched detached. call_id={call.object_id}")