Spaces:

build-small-hackathon
/

scrubdata

Running

scrubdata / scripts /modal_export_gguf.py

OpenAI Codex

deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build

16dc556 12 days ago

2.68 kB

	"""Export the blessed v6 adapter (Modal volume) -> merged bf16 -> GGUF Q8_0 -> HF Hub.

	Q8_0 only: Q4_K_M corrupts this model (documented v4 failure). CPU instance (merge +
	convert are RAM-bound, no GPU needed). The HF token is passed as a function argument at
	spawn time — transient, never baked into the image or logged.

	uv run modal run scripts/modal_export_gguf.py --repo ricalanis/scrubdata-qwen3-4b-v6-q8
	"""

	import modal

	image = (
	modal.Image.debian_slim(python_version="3.11")
	.apt_install("git")
	.pip_install("torch", "transformers>=4.45", "peft", "accelerate", "sentencepiece",
	"huggingface_hub", "gguf", "numpy", "safetensors")
	.run_commands("git clone --depth 1 https://github.com/ggml-org/llama.cpp /llama.cpp")
	)
	app = modal.App("scrubdata-export-gguf", image=image)
	adapter_vol = modal.Volume.from_name("scrubdata-v5-adapter")


	@app.function(cpu=8, memory=49152, timeout=3600, volumes={"/vol": adapter_vol})
	def export(repo: str, hf_token: str, adapter: str = "/vol/v5_seed21"):
	import subprocess
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	from huggingface_hub import HfApi

	base_id = "unsloth/Qwen3-4B-Instruct-2507"
	print("loading base + adapter (CPU, bf16)...", flush=True)
	base = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=torch.bfloat16)
	model = PeftModel.from_pretrained(base, adapter).merge_and_unload()
	tok = AutoTokenizer.from_pretrained(base_id)
	model.save_pretrained("/tmp/merged", safe_serialization=True)
	tok.save_pretrained("/tmp/merged")
	print("merged model saved", flush=True)

	out = "/tmp/scrubdata-qwen3-4b-v6.Q8_0.gguf"
	subprocess.run(["python", "/llama.cpp/convert_hf_to_gguf.py", "/tmp/merged",
	"--outfile", out, "--outtype", "q8_0"], check=True)
	import os
	print(f"GGUF ready: {os.path.getsize(out)/1e9:.2f} GB", flush=True)

	api = HfApi(token=hf_token)
	api.create_repo(repo, repo_type="model", exist_ok=True)
	api.upload_file(path_or_fileobj=out, path_in_repo="scrubdata-qwen3-4b-v6.Q8_0.gguf",
	repo_id=repo, repo_type="model",
	commit_message="v6 (mixA): Q8_0 GGUF — hospital repair 0.475/0.185")
	print(f"uploaded to https://huggingface.co/{repo}", flush=True)
	return repo


	@app.local_entrypoint()
	def main(repo: str = "ricalanis/scrubdata-qwen3-4b-v6-q8"):
	from huggingface_hub import get_token
	token = get_token()
	assert token, "no local HF token found"
	call = export.spawn(repo=repo, hf_token=token)
	print(f"Launched detached. call_id={call.object_id}")