#!/usr/bin/env python3 """ 🔧 LoRA Training Script Generated by: MLResearcher (Hivemind Colony) Adapter: hivemind-instruct-587c9d19 Base Model: google/gemma-2-2b-it Task: instruct """ import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from datasets import load_dataset from trl import SFTTrainer import bitsandbytes as bnb # ============ CONFIG ============ BASE_MODEL = "google/gemma-2-2b-it" ADAPTER_NAME = "hivemind-instruct-587c9d19" # LoRA Configuration lora_config = LoraConfig( r=32, lora_alpha=64, lora_dropout=0.1, target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], bias="none", task_type="CAUSAL_LM" ) # Training Configuration training_args = TrainingArguments( output_dir=f"./{ADAPTER_NAME}", num_train_epochs=2, per_device_train_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.01, warmup_ratio=0.03, lr_scheduler_type="cosine", logging_steps=10, save_strategy="epoch", fp16=True, optim="paged_adamw_8bit", report_to="none" ) # ============ LOAD MODEL ============ print(f"Loading {BASE_MODEL}...") # 4-bit quantization for QLoRA from transformers import BitsAndBytesConfig bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True ) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, quantization_config=bnb_config, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token # Prepare model for training model = prepare_model_for_kbit_training(model) model = get_peft_model(model, lora_config) print(f"Trainable parameters: {model.print_trainable_parameters()}") # ============ LOAD DATASET ============ # Replace with your dataset dataset = load_dataset("your-dataset-here", split="train") def format_prompt(example): return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}" # ============ TRAIN ============ trainer = SFTTrainer( model=model, train_dataset=dataset, tokenizer=tokenizer, args=training_args, max_seq_length=1024, formatting_func=format_prompt, packing=True ) print("Starting training...") trainer.train() # ============ SAVE ============ print(f"Saving adapter to ./{ADAPTER_NAME}") trainer.save_model(f"./{ADAPTER_NAME}") # Push to HuggingFace print("Pushing to HuggingFace Hub...") model.push_to_hub(f"Pista1981/{ADAPTER_NAME}") tokenizer.push_to_hub(f"Pista1981/{ADAPTER_NAME}") print("✅ Training complete!")