| |
| """ |
| 🔧 LoRA Training Script |
| Generated by: MLResearcher (Hivemind Colony) |
| Adapter: hivemind-chat-4976c6fe |
| Base Model: Qwen/Qwen2.5-3B-Instruct |
| Task: chat |
| """ |
|
|
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
| from datasets import load_dataset |
| from trl import SFTTrainer |
| import bitsandbytes as bnb |
|
|
| |
| BASE_MODEL = "Qwen/Qwen2.5-3B-Instruct" |
| ADAPTER_NAME = "hivemind-chat-4976c6fe" |
|
|
| |
| lora_config = LoraConfig( |
| r=16, |
| lora_alpha=32, |
| lora_dropout=0.05, |
| target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'], |
| bias="none", |
| task_type="CAUSAL_LM" |
| ) |
|
|
| |
| training_args = TrainingArguments( |
| output_dir=f"./{ADAPTER_NAME}", |
| num_train_epochs=3, |
| per_device_train_batch_size=4, |
| gradient_accumulation_steps=4, |
| learning_rate=0.0002, |
| weight_decay=0.01, |
| warmup_ratio=0.03, |
| lr_scheduler_type="cosine", |
| logging_steps=10, |
| save_strategy="epoch", |
| fp16=True, |
| optim="paged_adamw_8bit", |
| report_to="none" |
| ) |
|
|
| |
| print(f"Loading {BASE_MODEL}...") |
|
|
| |
| from transformers import BitsAndBytesConfig |
|
|
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_compute_dtype=torch.bfloat16, |
| bnb_4bit_use_double_quant=True |
| ) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| BASE_MODEL, |
| quantization_config=bnb_config, |
| device_map="auto", |
| trust_remote_code=True |
| ) |
|
|
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) |
| tokenizer.pad_token = tokenizer.eos_token |
|
|
| |
| model = prepare_model_for_kbit_training(model) |
| model = get_peft_model(model, lora_config) |
|
|
| print(f"Trainable parameters: {model.print_trainable_parameters()}") |
|
|
| |
| |
| dataset = load_dataset("your-dataset-here", split="train") |
|
|
| def format_prompt(example): |
| return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}" |
|
|
| |
| trainer = SFTTrainer( |
| model=model, |
| train_dataset=dataset, |
| tokenizer=tokenizer, |
| args=training_args, |
| max_seq_length=2048, |
| formatting_func=format_prompt, |
| packing=True |
| ) |
|
|
| print("Starting training...") |
| trainer.train() |
|
|
| |
| print(f"Saving adapter to ./{ADAPTER_NAME}") |
| trainer.save_model(f"./{ADAPTER_NAME}") |
|
|
| |
| print("Pushing to HuggingFace Hub...") |
| model.push_to_hub(f"Pista1981/{ADAPTER_NAME}") |
| tokenizer.push_to_hub(f"Pista1981/{ADAPTER_NAME}") |
|
|
| print("✅ Training complete!") |
|
|