Spaces:
Sleeping
Sleeping
| import os | |
| # --- IMPORT PRIORITY: Unsloth must be imported before transformers/torch if used --- | |
| try: | |
| import torch | |
| if torch.cuda.is_available(): | |
| # Force unsloth import first on GPU | |
| from unsloth import FastLanguageModel | |
| HAS_UNSLOTH = True | |
| print("ORA Trainer: Unsloth imported successfully.") | |
| else: | |
| HAS_UNSLOTH = False | |
| except ImportError: | |
| HAS_UNSLOTH = False | |
| # Now safe to import others | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TrainingArguments, | |
| ) | |
| from datasets import load_dataset | |
| from trl import SFTTrainer, SFTConfig | |
| # Settings | |
| MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" # Base model | |
| MAX_SEQ_LENGTH = 2048 | |
| OUTPUT_DIR = "important/finetuning/models/ora_adapter" | |
| def train_ora(max_steps=5): | |
| has_cuda = torch.cuda.is_available() | |
| print(f"ORA Trainer: CUDA Detected = {has_cuda}") | |
| if has_cuda and HAS_UNSLOTH: | |
| # --- MODE: GPU (Unsloth/QLoRA) --- | |
| print("ORA Trainer: Using GPU + Unsloth (Standard for Google Colab)") | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit", | |
| max_seq_length = MAX_SEQ_LENGTH, | |
| load_in_4bit = True, | |
| ) | |
| model = FastLanguageModel.get_peft_model( | |
| model, | |
| r = 16, | |
| target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"], | |
| lora_alpha = 32, | |
| lora_dropout = 0, | |
| bias = "none", | |
| use_gradient_checkpointing = "unsloth", | |
| random_state = 3407, | |
| ) | |
| else: | |
| # --- MODE: CPU (Standard PEFT) --- | |
| print("ORA Trainer: Using CPU + Standard PEFT (Local Hardware Mode)") | |
| from peft import LoraConfig, get_peft_model, TaskType | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| low_cpu_mem_usage=True | |
| ) | |
| lora_config = LoraConfig( | |
| r=16, | |
| lora_alpha=32, | |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type=TaskType.CAUSAL_LM, | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| # Common Dataset Loading | |
| data_path = "important/curated_data/final_ora_dataset.jsonl" | |
| if not os.path.exists(data_path): | |
| print(f"Error: Dataset {data_path} not found. Run consolidation first!") | |
| return | |
| dataset = load_dataset("json", data_files=data_path, split="train") | |
| # SFTConfig (replaces TrainingArguments + extra SFT args) | |
| training_args = SFTConfig( | |
| output_dir=OUTPUT_DIR, | |
| per_device_train_batch_size=2 if has_cuda else 1, | |
| gradient_accumulation_steps=4, | |
| learning_rate=2e-4, | |
| max_steps=max_steps, | |
| logging_steps=1, | |
| save_strategy="no", | |
| use_cpu=not has_cuda, | |
| report_to="none", | |
| max_length=MAX_SEQ_LENGTH, | |
| dataset_text_field="text", | |
| dataset_num_proc=2, # Limit processes to avoid pickling errors | |
| ) | |
| trainer = SFTTrainer( | |
| model=model, | |
| train_dataset=dataset, | |
| processing_class=tokenizer, | |
| args=training_args, | |
| ) | |
| print(f"ORA Trainer: Starting training ({max_steps} steps)...") | |
| trainer.train() | |
| print(f"ORA Trainer: Saving adapter to {OUTPUT_DIR}...") | |
| model.save_pretrained(OUTPUT_DIR) | |
| tokenizer.save_pretrained(OUTPUT_DIR) | |
| print("ORA Trainer: Training complete.") | |
| if __name__ == "__main__": | |
| train_ora(max_steps=100) # Increased default steps for user utility, they can kill it if needed | |