ORA / scripts /train_ora.py
Abdalkaderdev's picture
Initial ORA deployment
5e0532d
import os
# --- IMPORT PRIORITY: Unsloth must be imported before transformers/torch if used ---
try:
import torch
if torch.cuda.is_available():
# Force unsloth import first on GPU
from unsloth import FastLanguageModel
HAS_UNSLOTH = True
print("ORA Trainer: Unsloth imported successfully.")
else:
HAS_UNSLOTH = False
except ImportError:
HAS_UNSLOTH = False
# Now safe to import others
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
)
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
# Settings
MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" # Base model
MAX_SEQ_LENGTH = 2048
OUTPUT_DIR = "important/finetuning/models/ora_adapter"
def train_ora(max_steps=5):
has_cuda = torch.cuda.is_available()
print(f"ORA Trainer: CUDA Detected = {has_cuda}")
if has_cuda and HAS_UNSLOTH:
# --- MODE: GPU (Unsloth/QLoRA) ---
print("ORA Trainer: Using GPU + Unsloth (Standard for Google Colab)")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
max_seq_length = MAX_SEQ_LENGTH,
load_in_4bit = True,
)
model = FastLanguageModel.get_peft_model(
model,
r = 16,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
lora_alpha = 32,
lora_dropout = 0,
bias = "none",
use_gradient_checkpointing = "unsloth",
random_state = 3407,
)
else:
# --- MODE: CPU (Standard PEFT) ---
print("ORA Trainer: Using CPU + Standard PEFT (Local Hardware Mode)")
from peft import LoraConfig, get_peft_model, TaskType
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu",
low_cpu_mem_usage=True
)
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, lora_config)
# Common Dataset Loading
data_path = "important/curated_data/final_ora_dataset.jsonl"
if not os.path.exists(data_path):
print(f"Error: Dataset {data_path} not found. Run consolidation first!")
return
dataset = load_dataset("json", data_files=data_path, split="train")
# SFTConfig (replaces TrainingArguments + extra SFT args)
training_args = SFTConfig(
output_dir=OUTPUT_DIR,
per_device_train_batch_size=2 if has_cuda else 1,
gradient_accumulation_steps=4,
learning_rate=2e-4,
max_steps=max_steps,
logging_steps=1,
save_strategy="no",
use_cpu=not has_cuda,
report_to="none",
max_length=MAX_SEQ_LENGTH,
dataset_text_field="text",
dataset_num_proc=2, # Limit processes to avoid pickling errors
)
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
processing_class=tokenizer,
args=training_args,
)
print(f"ORA Trainer: Starting training ({max_steps} steps)...")
trainer.train()
print(f"ORA Trainer: Saving adapter to {OUTPUT_DIR}...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("ORA Trainer: Training complete.")
if __name__ == "__main__":
train_ora(max_steps=100) # Increased default steps for user utility, they can kill it if needed