| |
| |
|
|
| !pip3 install -q -U transformers==4.38.2 |
| !pip3 install -q -U datasets==2.18.0 |
| !pip3 install -q -U bitsandbytes==0.42.0 |
| !pip3 install -q -U peft==0.9.0 |
| !pip3 install -q -U trl==0.7.11 |
| !pip3 install -q -U accelerate==0.27.2 |
|
|
| |
|
|
| import torch |
| from datasets import Dataset, load_dataset |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TrainingArguments |
| from peft import LoraConfig, PeftModel |
| from trl import SFTTrainer |
|
|
| |
|
|
| from huggingface_hub import notebook_login |
| notebook_login() |
|
|
| |
| |
|
|
| from datasets import load_dataset |
| dataset = load_dataset("daekeun-ml/naver-news-summarization-ko") |
| |
|
|
| dataset |
| |
|
|
| dataset['train'][0] |
|
|
| |
| |
|
|
| BASE_MODEL = "google/gemma-2b-it" |
|
|
| model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map={"":0}) |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True) |
| |
|
|
| doc = dataset['train']['document'][0] |
|
|
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) |
|
|
| messages = [ |
| { |
| "role": "user", |
| "content": "๋ค์ ๊ธ์ ์์ฝํด์ฃผ์ธ์ :\n\n{}".format(doc) |
| } |
| ] |
| prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
|
| prompt |
| |
|
|
| outputs = pipe( |
| prompt, |
| do_sample=True, |
| temperature=0.2, |
| top_k=50, |
| top_p=0.95, |
| add_special_tokens=True |
| ) |
|
|
| print(outputs[0]["generated_text"][len(prompt):]) |
|
|
| |
| ์ฃผ์: Colab GPU ๋ฉ๋ชจ๋ฆฌ ํ๊ณ๋ก ์ด์ ์ฅ ์ถ๋ก ์์ ์ฌ์ฉํ๋ ๋ฉ๋ชจ๋ฆฌ๋ฅผ ๋น์ ์ค์ผ ํ์ธํ๋์ ์งํ ํ ์ ์์ต๋๋ค. |
| notebook ๋ฐํ์ ์ธ์
์ ์ฌ์์ ํ ํ 1๋ฒ๊ณผ 2๋ฒ์ 2.1 ํญ๋ชฉ๊น์ง ๋ค์ ์คํํ์ฌ ๋ก๋ ํ ํ ์๋ ๊ณผ์ ์ ์งํํฉ๋๋ค |
|
|
| !nvidia-smi |
| |
|
|
| def generate_prompt(example): |
| prompt_list = [] |
| for i in range(len(example['document'])): |
| prompt_list.append(r"""<bos><start_of_turn>user |
| ๋ค์ ๊ธ์ ์์ฝํด์ฃผ์ธ์: |
| |
| {}<end_of_turn> |
| <start_of_turn>model |
| {}<end_of_turn><eos>""".format(example['document'][i], example['summary'][i])) |
| return prompt_list |
|
|
| train_data = dataset['train'] |
| print(generate_prompt(train_data[:1])[0]) |
| |
|
|
| lora_config = LoraConfig( |
| r=6, |
| target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"], |
| task_type="CAUSAL_LM", |
| ) |
|
|
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_compute_dtype=torch.float16 |
| ) |
|
|
| BASE_MODEL = "google/gemma-2b-it" |
| model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto", quantization_config=bnb_config) |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True) |
| tokenizer.padding_side = 'right' |
| |
|
|
| trainer = SFTTrainer( |
| model=model, |
| train_dataset=train_data, |
| max_seq_length=512, |
| args=TrainingArguments( |
| output_dir="outputs", |
| |
| max_steps=3000, |
| per_device_train_batch_size=1, |
| gradient_accumulation_steps=4, |
| optim="paged_adamw_8bit", |
| warmup_steps=0.03, |
| learning_rate=2e-4, |
| fp16=True, |
| logging_steps=100, |
| push_to_hub=False, |
| report_to='none', |
| ), |
| peft_config=lora_config, |
| formatting_func=generate_prompt, |
| ) |
|
|
| trainer.train() |
| |
|
|
| ADAPTER_MODEL = "lora_adapter" |
|
|
| trainer.model.save_pretrained(ADAPTER_MODEL) |
|
|
| !ls -alh lora_adapter |
|
|
| model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map='auto', torch_dtype=torch.float16) |
| model = PeftModel.from_pretrained(model, ADAPTER_MODEL, device_map='auto', torch_dtype=torch.float16) |
|
|
| model = model.merge_and_unload() |
| model.save_pretrained('gemma-2b-it-sum-ko') |
|
|
| !ls -alh ./gemma-2b-it-sum-ko |
|
|
| |
| ์ฃผ์: ๋ง์ฐฌ๊ฐ์ง๋ก Colab GPU ๋ฉ๋ชจ๋ฆฌ ํ๊ณ๋ก ํ์ต ์ ์ฌ์ฉํ๋ ๋ฉ๋ชจ๋ฆฌ๋ฅผ ๋น์ ์ค์ผ ํ์ธํ๋์ ์งํ ํ ์ ์์ต๋๋ค. |
| notebook ๋ฐํ์ ์ธ์
์ ์ฌ์์ ํ ํ 1๋ฒ๊ณผ 2๋ฒ์ 2.1 ํญ๋ชฉ๊น์ง ๋ค์ ์คํํ์ฌ ๋ก๋ ํ ํ ์๋ ๊ณผ์ ์ ์งํํฉ๋๋ค |
|
|
| !nvidia-smi |
| |
|
|
| BASE_MODEL = "google/gemma-2b-it" |
| FINETUNE_MODEL = "./gemma-2b-it-sum-ko" |
|
|
| finetune_model = AutoModelForCausalLM.from_pretrained(FINETUNE_MODEL, device_map={"":0}) |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True) |
| |
|
|
| pipe_finetuned = pipeline("text-generation", model=finetune_model, tokenizer=tokenizer, max_new_tokens=512) |
|
|
| doc = dataset['test']['document'][10] |
|
|
| messages = [ |
| { |
| "role": "user", |
| "content": "๋ค์ ๊ธ์ ์์ฝํด์ฃผ์ธ์:\n\n{}".format(doc) |
| } |
| ] |
| prompt = pipe_finetuned.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
|
| outputs = pipe_finetuned( |
| prompt, |
| do_sample=True, |
| temperature=0.2, |
| top_k=50, |
| top_p=0.95, |
| add_special_tokens=True |
| ) |
| print(outputs[0]["generated_text"][len(prompt):]) |
|
|
| |