|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| """ProofKit β fine-tune a small model (LoRA SFT) on Hugging Face Jobs.
|
|
|
| This script runs ON HUGGING FACE JOBS, not locally. It loads the ProofKit SFT
|
| dataset from the Hub, trains an attention-only LoRA adapter, and pushes it back
|
| to the Hub. It works for any base model; the intended HF Jobs target is a small
|
| dense model like meta-llama/Llama-3.2-3B-Instruct β fast and cheap on a T4, and
|
| the model that feeds ProofKit's GGUF / llama.cpp backend (the Llama Champion +
|
| Off the Grid badges). gpt-oss-20b is trained on Modal instead, where its MoE
|
| experts can be adapted on a bigger GPU β see scripts/modal_train_gpt_oss.py and
|
| docs/FINETUNE_MODAL.md.
|
|
|
| β οΈ The Jobs container is ephemeral β everything is deleted when the job ends.
|
| `push_to_hub=True` (+ the HF_TOKEN secret) is what makes the result survive.
|
|
|
| Submit it from your terminal (after uploading this file to a Hub repo):
|
|
|
| hf jobs uv run \\
|
| --flavor a100-large \\
|
| --timeout 3h \\
|
| --secrets HF_TOKEN \\
|
| "https://huggingface.co/visproj/proofkit-train-scripts/resolve/main/train_gpt_oss.py"
|
|
|
| Configuration is via environment variables (pass with `--env KEY=VALUE`):
|
|
|
| BASE_MODEL base model to tune (default: openai/gpt-oss-20b)
|
| DATASET_REPO Hub dataset to train on (default: visproj/proofkit-sft)
|
| MODEL_REPO Hub repo to push to (default: visproj/proofkit-gpt-oss-20b-lora)
|
| EPOCHS training epochs (default: 3)
|
| LR learning rate (default: 2e-4)
|
| MAX_LEN max sequence length (default: 1024)
|
|
|
| See docs/FINETUNE_HF_JOBS.md for the full runbook.
|
| """
|
| import os
|
|
|
| from datasets import load_dataset
|
| from peft import LoraConfig, TaskType
|
| from trl import SFTConfig, SFTTrainer
|
|
|
| BASE_MODEL = os.environ.get("BASE_MODEL", "openai/gpt-oss-20b")
|
| DATASET_REPO = os.environ.get("DATASET_REPO", "visproj/proofkit-sft")
|
| MODEL_REPO = os.environ.get("MODEL_REPO", "visproj/proofkit-gpt-oss-20b-lora")
|
| EPOCHS = float(os.environ.get("EPOCHS", "3"))
|
| LR = float(os.environ.get("LR", "2e-4"))
|
| MAX_LEN = int(os.environ.get("MAX_LEN", "1024"))
|
| is_gpt_oss = "gpt-oss" in BASE_MODEL.lower()
|
|
|
| print(f"Base model : {BASE_MODEL}", flush=True)
|
| print(f"Dataset : {DATASET_REPO}", flush=True)
|
| print(f"Push to : {MODEL_REPO}", flush=True)
|
|
|
| dataset = load_dataset(DATASET_REPO, split="train")
|
| print(f"Examples : {len(dataset)}", flush=True)
|
|
|
| model_init_kwargs = {
|
| "attn_implementation": "eager",
|
| "torch_dtype": "auto",
|
| "use_cache": False,
|
| }
|
|
|
|
|
|
|
| if is_gpt_oss:
|
| try:
|
| from transformers import Mxfp4Config
|
|
|
| model_init_kwargs["quantization_config"] = Mxfp4Config(dequantize=True)
|
| print("MXFP4 dequantize: on", flush=True)
|
| except Exception:
|
| print("MXFP4 dequantize: unavailable (training in native dtype)", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| lora = LoraConfig(
|
| r=8,
|
| lora_alpha=16,
|
| lora_dropout=0.05,
|
| bias="none",
|
| task_type=TaskType.CAUSAL_LM,
|
| target_modules="all-linear",
|
| )
|
|
|
| args = SFTConfig(
|
| output_dir="proofkit-gpt-oss-20b",
|
| num_train_epochs=EPOCHS,
|
| per_device_train_batch_size=1,
|
| gradient_accumulation_steps=8,
|
| learning_rate=LR,
|
| max_length=MAX_LEN,
|
| bf16=True,
|
| gradient_checkpointing=True,
|
| logging_steps=10,
|
| save_strategy="no",
|
| push_to_hub=True,
|
| hub_model_id=MODEL_REPO,
|
| report_to="trackio",
|
| run_name="gpt-oss-20b-lora-sft",
|
| model_init_kwargs=model_init_kwargs,
|
| )
|
|
|
| trainer = SFTTrainer(
|
| model=BASE_MODEL,
|
| train_dataset=dataset,
|
| peft_config=lora,
|
| args=args,
|
| )
|
|
|
| print("Training...", flush=True)
|
| trainer.train()
|
| trainer.push_to_hub()
|
| print(f"Done. Adapter pushed to https://huggingface.co/{MODEL_REPO}", flush=True)
|
|
|