|
|
|
|
|
""" |
|
|
🔧 LoRA Training Script |
|
|
Generated by: MLResearcher (Hivemind Colony) |
|
|
Adapter: hivemind-code-6440183e |
|
|
Base Model: microsoft/Phi-3-mini-4k-instruct |
|
|
Task: code |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments |
|
|
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
|
|
from datasets import load_dataset |
|
|
from trl import SFTTrainer |
|
|
import bitsandbytes as bnb |
|
|
|
|
|
|
|
|
BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct" |
|
|
ADAPTER_NAME = "hivemind-code-6440183e" |
|
|
|
|
|
|
|
|
lora_config = LoraConfig( |
|
|
r=8, |
|
|
lora_alpha=16, |
|
|
lora_dropout=0.05, |
|
|
target_modules=['q_proj', 'v_proj'], |
|
|
bias="none", |
|
|
task_type="CAUSAL_LM" |
|
|
) |
|
|
|
|
|
|
|
|
training_args = TrainingArguments( |
|
|
output_dir=f"./{ADAPTER_NAME}", |
|
|
num_train_epochs=1, |
|
|
per_device_train_batch_size=2, |
|
|
gradient_accumulation_steps=4, |
|
|
learning_rate=5e-05, |
|
|
weight_decay=0.01, |
|
|
warmup_ratio=0.03, |
|
|
lr_scheduler_type="cosine", |
|
|
logging_steps=10, |
|
|
save_strategy="epoch", |
|
|
fp16=True, |
|
|
optim="paged_adamw_8bit", |
|
|
report_to="none" |
|
|
) |
|
|
|
|
|
|
|
|
print(f"Loading {BASE_MODEL}...") |
|
|
|
|
|
|
|
|
from transformers import BitsAndBytesConfig |
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_quant_type="nf4", |
|
|
bnb_4bit_compute_dtype=torch.bfloat16, |
|
|
bnb_4bit_use_double_quant=True |
|
|
) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL, |
|
|
quantization_config=bnb_config, |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
|
|
|
model = prepare_model_for_kbit_training(model) |
|
|
model = get_peft_model(model, lora_config) |
|
|
|
|
|
print(f"Trainable parameters: {model.print_trainable_parameters()}") |
|
|
|
|
|
|
|
|
|
|
|
dataset = load_dataset("your-dataset-here", split="train") |
|
|
|
|
|
def format_prompt(example): |
|
|
return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}" |
|
|
|
|
|
|
|
|
trainer = SFTTrainer( |
|
|
model=model, |
|
|
train_dataset=dataset, |
|
|
tokenizer=tokenizer, |
|
|
args=training_args, |
|
|
max_seq_length=4096, |
|
|
formatting_func=format_prompt, |
|
|
packing=True |
|
|
) |
|
|
|
|
|
print("Starting training...") |
|
|
trainer.train() |
|
|
|
|
|
|
|
|
print(f"Saving adapter to ./{ADAPTER_NAME}") |
|
|
trainer.save_model(f"./{ADAPTER_NAME}") |
|
|
|
|
|
|
|
|
print("Pushing to HuggingFace Hub...") |
|
|
model.push_to_hub(f"Pista1981/{ADAPTER_NAME}") |
|
|
tokenizer.push_to_hub(f"Pista1981/{ADAPTER_NAME}") |
|
|
|
|
|
print("✅ Training complete!") |
|
|
|