| | import torch |
| | from huggingface_hub import HfApi |
| | from huggingface_hub import create_repo |
| | from unsloth import FastLanguageModel |
| | import torch |
| | from datasets import load_dataset |
| | import random |
| |
|
| | max_seq_length = 2048 |
| | dtype = None |
| | load_in_4bit = True |
| | repo_name = "instruct-v19" |
| | |
| | import wandb |
| | wandb.init( |
| | project="unsloth_lora", |
| | name= repo_name, |
| | ) |
| |
|
| | model, tokenizer = FastLanguageModel.from_pretrained( |
| |
|
| | model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct", |
| | max_seq_length = max_seq_length, |
| | dtype = dtype, |
| | load_in_4bit = load_in_4bit, |
| | token = "", |
| | ) |
| |
|
| | model = FastLanguageModel.get_peft_model( |
| |
|
| | model, |
| | r = 64, |
| | target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", |
| | "gate_proj", "up_proj", "down_proj",], |
| | lora_alpha = 16, |
| | lora_dropout = 0, |
| | bias = "none", |
| | |
| | use_gradient_checkpointing = "unsloth", |
| | random_state = 3407, |
| | use_rslora = False, |
| | loftq_config = None, |
| | ) |
| |
|
| | from datasets import load_dataset |
| | dataset = load_dataset("Chaser-cz/ChaiTop100-SHAREGPT") |
| | train_dataset = dataset["train"].shuffle(seed=random.randint(1, 9999)) |
| | from unsloth.chat_templates import get_chat_template |
| | tokenizer = get_chat_template( |
| | tokenizer, |
| | chat_template = "llama-3", |
| | mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, |
| | ) |
| | def formatting_prompts_func(examples): |
| | convos = examples["conversations"] |
| | texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos] |
| | return { "text" : texts, } |
| | pass |
| |
|
| | train_dataset = train_dataset.map(formatting_prompts_func, batched = True,) |
| | from trl import SFTTrainer |
| | from transformers import TrainingArguments |
| | from unsloth import is_bfloat16_supported |
| |
|
| | trainer = SFTTrainer( |
| |
|
| | model = model, |
| | tokenizer = tokenizer, |
| | train_dataset = train_dataset, |
| | dataset_text_field = "text", |
| | max_seq_length = max_seq_length, |
| | dataset_num_proc = 2, |
| | packing = False, |
| | args = TrainingArguments( |
| |
|
| | per_device_train_batch_size = 2, |
| | gradient_accumulation_steps = 32, |
| | warmup_steps = 5, |
| | max_steps = 1000, |
| | learning_rate = 2.5e-4, |
| | fp16 = not is_bfloat16_supported(), |
| | bf16 = is_bfloat16_supported(), |
| | logging_steps = 1, |
| | optim = "adamw_8bit", |
| | weight_decay = 0.01, |
| | lr_scheduler_type = "cosine", |
| | seed = 3407, |
| | output_dir = "outputs/lora-out-8b", |
| | save_strategy = "steps", |
| | save_steps = 500,) |
| | ) |
| |
|
| |
|
| | trainer_stats = trainer.train() |
| |
|
| | model.save_pretrained_merged("outputs/lora-out-8b/merged", tokenizer, save_method = "merged_16bit",) |
| |
|
| | api = HfApi() |
| | create_repo(f"jic062/{repo_name}", repo_type="model",private=True, token="") |
| | api.upload_folder( |
| | folder_path="outputs/lora-out-8b/merged", |
| | repo_id=f"jic062/{repo_name}", |
| | repo_type="model", |
| | ) |
| | wandb.finish() |
| |
|