Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| from datasets import load_dataset | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| from trl import SFTTrainer, SFTConfig | |
| from huggingface_hub import login | |
| print("Starting Pacific Quant Finance & Compliance Training on Hugging Face...") | |
| # --- HUGGING FACE SETTINGS --- | |
| # Using the token injected via environment variable or default | |
| HF_TOKEN = os.getenv("HF_TOKEN", "YOUR_HF_TOKEN") | |
| login(token=HF_TOKEN) | |
| # Base foundational model you uploaded | |
| BASE_MODEL_PATH = "GRRNMAKER/magnus-qwen-foundation" | |
| # Dataset we just uploaded to Hugging Face | |
| DATASET_ID = "GRRNMAKER/pacific-finance-dataset" | |
| # Final repository to push the trained adapters to | |
| HF_PUSH_REPO = "GRRNMAKER/Pacific-Quant-Finance-V1" | |
| # Local directories for training | |
| OUTPUT_DIR = "./pacific_finance_checkpoints" | |
| print(f"Loading tokenizer from {BASE_MODEL_PATH}...") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_fast=False) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print(f"Loading base model {BASE_MODEL_PATH} with 4-bit Quantization...") | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=False, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL_PATH, | |
| device_map="auto" | |
| ) | |
| # model = prepare_model_for_kbit_training(model) | |
| model.config.use_cache = False | |
| print("Applying LoRA Configuration...") | |
| lora_config = LoraConfig( | |
| r=32, | |
| lora_alpha=64, | |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM", | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| print(f"Loading dataset {DATASET_ID}...") | |
| dataset = load_dataset(DATASET_ID, split="train") | |
| print("Configuring SFT Trainer...") | |
| trainer = SFTTrainer( | |
| model=model, | |
| processing_class=tokenizer, | |
| train_dataset=dataset, | |
| args=SFTConfig( | |
| dataset_text_field="text", | |
| max_seq_length=4096, | |
| output_dir=OUTPUT_DIR, | |
| per_device_train_batch_size=2, | |
| gradient_accumulation_steps=8, | |
| warmup_steps=100, | |
| num_train_epochs=2, | |
| learning_rate=1e-4, | |
| fp16=True, | |
| logging_steps=10, | |
| save_steps=200, | |
| optim="paged_adamw_8bit", | |
| report_to="none", | |
| gradient_checkpointing=True, | |
| ), | |
| ) | |
| print("Beginning SFTTrainer loop...") | |
| trainer.train() | |
| print(f"Training complete. Pushing adapters directly to Hugging Face: {HF_PUSH_REPO}...") | |
| trainer.model.push_to_hub(HF_PUSH_REPO) | |
| tokenizer.push_to_hub(HF_PUSH_REPO) | |
| print("Push complete! Model is now available on Hugging Face.") | |