GRRNMAKER's picture
Upload task.py with huggingface_hub
daa7ac0 verified
Raw
History Blame Contribute Delete
2.81 kB
import os
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig
from huggingface_hub import login
print("Starting Pacific Quant Finance & Compliance Training on Hugging Face...")
# --- HUGGING FACE SETTINGS ---
# Using the token injected via environment variable or default
HF_TOKEN = os.getenv("HF_TOKEN", "YOUR_HF_TOKEN")
login(token=HF_TOKEN)
# Base foundational model you uploaded
BASE_MODEL_PATH = "GRRNMAKER/magnus-qwen-foundation"
# Dataset we just uploaded to Hugging Face
DATASET_ID = "GRRNMAKER/pacific-finance-dataset"
# Final repository to push the trained adapters to
HF_PUSH_REPO = "GRRNMAKER/Pacific-Quant-Finance-V1"
# Local directories for training
OUTPUT_DIR = "./pacific_finance_checkpoints"
print(f"Loading tokenizer from {BASE_MODEL_PATH}...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_fast=False)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print(f"Loading base model {BASE_MODEL_PATH} with 4-bit Quantization...")
bnb_config = BitsAndBytesConfig(
load_in_4bit=False,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_PATH,
device_map="auto"
)
# model = prepare_model_for_kbit_training(model)
model.config.use_cache = False
print("Applying LoRA Configuration...")
lora_config = LoraConfig(
r=32,
lora_alpha=64,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
print(f"Loading dataset {DATASET_ID}...")
dataset = load_dataset(DATASET_ID, split="train")
print("Configuring SFT Trainer...")
trainer = SFTTrainer(
model=model,
processing_class=tokenizer,
train_dataset=dataset,
args=SFTConfig(
dataset_text_field="text",
max_seq_length=4096,
output_dir=OUTPUT_DIR,
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
warmup_steps=100,
num_train_epochs=2,
learning_rate=1e-4,
fp16=True,
logging_steps=10,
save_steps=200,
optim="paged_adamw_8bit",
report_to="none",
gradient_checkpointing=True,
),
)
print("Beginning SFTTrainer loop...")
trainer.train()
print(f"Training complete. Pushing adapters directly to Hugging Face: {HF_PUSH_REPO}...")
trainer.model.push_to_hub(HF_PUSH_REPO)
tokenizer.push_to_hub(HF_PUSH_REPO)
print("Push complete! Model is now available on Hugging Face.")