Spaces:
Running
Running
| """Export the blessed v6 adapter (Modal volume) -> merged bf16 -> GGUF Q8_0 -> HF Hub. | |
| Q8_0 only: Q4_K_M corrupts this model (documented v4 failure). CPU instance (merge + | |
| convert are RAM-bound, no GPU needed). The HF token is passed as a function argument at | |
| spawn time — transient, never baked into the image or logged. | |
| uv run modal run scripts/modal_export_gguf.py --repo ricalanis/scrubdata-qwen3-4b-v6-q8 | |
| """ | |
| import modal | |
| image = ( | |
| modal.Image.debian_slim(python_version="3.11") | |
| .apt_install("git") | |
| .pip_install("torch", "transformers>=4.45", "peft", "accelerate", "sentencepiece", | |
| "huggingface_hub", "gguf", "numpy", "safetensors") | |
| .run_commands("git clone --depth 1 https://github.com/ggml-org/llama.cpp /llama.cpp") | |
| ) | |
| app = modal.App("scrubdata-export-gguf", image=image) | |
| adapter_vol = modal.Volume.from_name("scrubdata-v5-adapter") | |
| def export(repo: str, hf_token: str, adapter: str = "/vol/v5_seed21"): | |
| import subprocess | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| from huggingface_hub import HfApi | |
| base_id = "unsloth/Qwen3-4B-Instruct-2507" | |
| print("loading base + adapter (CPU, bf16)...", flush=True) | |
| base = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=torch.bfloat16) | |
| model = PeftModel.from_pretrained(base, adapter).merge_and_unload() | |
| tok = AutoTokenizer.from_pretrained(base_id) | |
| model.save_pretrained("/tmp/merged", safe_serialization=True) | |
| tok.save_pretrained("/tmp/merged") | |
| print("merged model saved", flush=True) | |
| out = "/tmp/scrubdata-qwen3-4b-v6.Q8_0.gguf" | |
| subprocess.run(["python", "/llama.cpp/convert_hf_to_gguf.py", "/tmp/merged", | |
| "--outfile", out, "--outtype", "q8_0"], check=True) | |
| import os | |
| print(f"GGUF ready: {os.path.getsize(out)/1e9:.2f} GB", flush=True) | |
| api = HfApi(token=hf_token) | |
| api.create_repo(repo, repo_type="model", exist_ok=True) | |
| api.upload_file(path_or_fileobj=out, path_in_repo="scrubdata-qwen3-4b-v6.Q8_0.gguf", | |
| repo_id=repo, repo_type="model", | |
| commit_message="v6 (mixA): Q8_0 GGUF — hospital repair 0.475/0.185") | |
| print(f"uploaded to https://huggingface.co/{repo}", flush=True) | |
| return repo | |
| def main(repo: str = "ricalanis/scrubdata-qwen3-4b-v6-q8"): | |
| from huggingface_hub import get_token | |
| token = get_token() | |
| assert token, "no local HF token found" | |
| call = export.spawn(repo=repo, hf_token=token) | |
| print(f"Launched detached. call_id={call.object_id}") | |