| from unsloth import PatchDPOTrainer |
| PatchDPOTrainer() |
| from huggingface_hub import HfApi |
| from huggingface_hub import create_repo |
| from unsloth import FastLanguageModel |
| import torch |
| from datasets import load_dataset |
| import random |
|
|
| max_seq_length = 4096 |
| dtype = None |
| load_in_4bit = True |
| repo_name = "dpo-v1-Nemo" |
| |
| import wandb |
| import random |
| wandb.init( |
| project="huggingface", |
| name= repo_name,) |
|
|
|
|
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name = "ijic062/Nemo-v1.1", |
| max_seq_length = max_seq_length, |
| dtype = dtype, |
| load_in_4bit = load_in_4bit, |
| token = "", |
| ) |
|
|
| |
|
|
| model = FastLanguageModel.get_peft_model( |
|
|
| model, |
| r = 64, |
| target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj",], |
| lora_alpha = 16, |
| lora_dropout = 0, |
| bias = "none", |
| |
| use_gradient_checkpointing = "unsloth", |
| random_state = 3407, |
| use_rslora = False, |
| loftq_config = None, |
|
|
| ) |
|
|
| |
|
|
| dataset = load_dataset( |
| "Chaser-cz/dpo-nice-prompt" |
| ) |
|
|
| train_dataset = dataset['train'].shuffle(seed=random.randint(1, 9999)) |
|
|
| |
| |
|
|
| column_names = list(dataset["train"].features) |
| print(f"This is column names: {column_names}") |
|
|
| import pprint |
| row = train_dataset[9] |
| pprint.pprint(row["prompt"]) |
| pprint.pprint(row["chosen"]) |
| pprint.pprint(row["rejected"]) |
| |
|
|
| from unsloth import PatchDPOTrainer |
| PatchDPOTrainer() |
| from trl import DPOTrainer |
| from transformers import TrainingArguments |
| from unsloth import is_bfloat16_supported |
|
|
| dpo_trainer = DPOTrainer( |
| model = model, |
| beta = 0.5, |
| tokenizer = tokenizer, |
| max_length = 1024, |
| max_prompt_length = 512, |
| train_dataset = train_dataset, |
| ref_model = None, |
| |
| |
| |
| |
| args = TrainingArguments( |
| |
| per_device_train_batch_size = 2, |
| gradient_accumulation_steps = 32, |
| gradient_checkpointing= True, |
| warmup_steps = 5, |
| |
| max_steps = 1000, |
| learning_rate = 2.5e-4, |
| fp16 = not is_bfloat16_supported(), |
| bf16 = is_bfloat16_supported(), |
| logging_steps = 1, |
| optim = "adamw_8bit", |
| weight_decay = 0.07, |
| lr_scheduler_type = "cosine", |
| seed = 3407, |
| output_dir = "outputs/dpo-out-13b", |
| save_strategy = "steps", |
| save_steps = 500, |
| ), |
| ) |
|
|
| dpo_trainer.train() |
|
|
| |
| model.save_pretrained_merged("outputs/dpo-out-13b/merged", tokenizer, save_method = "merged_16bit") |
| api = HfApi() |
| create_repo(f"jic062/{repo_name}", repo_type="model",private=True,token="") |
| api.upload_folder( |
| folder_path="outputs/dpo-out-13b/merged", |
| repo_id=f"jic062/{repo_name}", |
| repo_type="model", |
| ) |
| wandb.finish() |
|
|
|
|