Pista1981's picture
Upload train.py with huggingface_hub
e69599f verified
#!/usr/bin/env python3
"""
🔧 LoRA Training Script
Generated by: MLResearcher (Hivemind Colony)
Adapter: hivemind-instruct-587c9d19
Base Model: google/gemma-2-2b-it
Task: instruct
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from trl import SFTTrainer
import bitsandbytes as bnb
# ============ CONFIG ============
BASE_MODEL = "google/gemma-2-2b-it"
ADAPTER_NAME = "hivemind-instruct-587c9d19"
# LoRA Configuration
lora_config = LoraConfig(
r=32,
lora_alpha=64,
lora_dropout=0.1,
target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
bias="none",
task_type="CAUSAL_LM"
)
# Training Configuration
training_args = TrainingArguments(
output_dir=f"./{ADAPTER_NAME}",
num_train_epochs=2,
per_device_train_batch_size=8,
gradient_accumulation_steps=4,
learning_rate=0.0001,
weight_decay=0.01,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
logging_steps=10,
save_strategy="epoch",
fp16=True,
optim="paged_adamw_8bit",
report_to="none"
)
# ============ LOAD MODEL ============
print(f"Loading {BASE_MODEL}...")
# 4-bit quantization for QLoRA
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# Prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
print(f"Trainable parameters: {model.print_trainable_parameters()}")
# ============ LOAD DATASET ============
# Replace with your dataset
dataset = load_dataset("your-dataset-here", split="train")
def format_prompt(example):
return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"
# ============ TRAIN ============
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
tokenizer=tokenizer,
args=training_args,
max_seq_length=1024,
formatting_func=format_prompt,
packing=True
)
print("Starting training...")
trainer.train()
# ============ SAVE ============
print(f"Saving adapter to ./{ADAPTER_NAME}")
trainer.save_model(f"./{ADAPTER_NAME}")
# Push to HuggingFace
print("Pushing to HuggingFace Hub...")
model.push_to_hub(f"Pista1981/{ADAPTER_NAME}")
tokenizer.push_to_hub(f"Pista1981/{ADAPTER_NAME}")
print("✅ Training complete!")