File size: 3,791 Bytes
9ae43b7 c9df7d3 9ae43b7 6bbd250 c9df7d3 6bbd250 e929806 6bbd250 9ae43b7 74060e3 9ae43b7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | ---
license: apache-2.0
datasets:
- beomi/KoAlpaca-RealQA
language:
- ko
base_model:
- Qwen/Qwen2.5-Coder-1.5B-Instruct
pipeline_tag: text-generation
---
# Model Description
Qwen/Qwen2.5-Coder-1.5B-Instruct์ ๊ธฐ๋ฐ์ผ๋ก PEFT๋ฅผ ์ด์ฉํ์ฌ QLoRA (4-bit quantization + PEFT)ํด๋ณธ ๋ชจ๋ธ์
๋๋ค.
ํ์ต ๋ฐ์ดํฐ๋ beomi/KoAlpaca-RealQA๋ฅผ ์ฌ์ฉํ์์ต๋๋ค.
์์ ๋ชจ๋ธ์ ์ด์ฉํ์ฌ QLoRA๋ฅผ ํ ๊ฒ์ด๋ค ๋ณด๋ ์์ง์ output์ด ๋์ค์ง๋ ์์ง๋ง QLoRA๋ชจ๋ธ๊ณผ ์๋ณธ๋ชจ๋ธ์ ๋ต๋ณ์ด ์ฐจ์ด๋ ํ์คํ ์์์ต๋๋ค.
# Quantization Configuration
```python
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16,
)
```
# LoRA Condifiguration
```python
lora_config = LoraConfig(
r=8,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["c_attn", "q_proj", "v_proj"]
)
```
# Training Arguments
```python
training_args = TrainingArguments(
num_train_epochs=8,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
evaluation_strategy="steps",
eval_steps=300,
save_strategy="steps",
save_steps=300,
logging_steps=300,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
greater_is_better=False
)
```
# Training Progress
| Step | Training Loss | Validation Loss |
|------|---------------|-----------------|
| 300 | 1.595000 | 1.611501 |
| 600 | 1.593300 | 1.596210 |
| 900 | 1.577600 | 1.586121 |
| 1200 | 1.564600 | 1.577804 |
| ... | ... | ... |
| 7200 | 1.499700 | 1.525933 |
| 7500 | 1.493400 | 1.525612 |
| 7800 | 1.491000 | 1.525330 |
| 8100 | 1.499900 | 1.525138 |
# ์คํ ์ฝ๋
```python
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
# Quantization config (must match QLoRA settings used during fine-tuning)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16,
)
# Load tokenizer and model (local or hub path)
model_path = "onebeans/Qwen2.5-Coder-KoInstruct-QLoRA"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
model_path,
quantization_config=bnb_config,
device_map="auto"
)
model.eval()
# Define prompt using ChatML format (Qwen-style)
def build_chatml_prompt(question: str) -> str:
system_msg = "<|im_start|>system\n๋น์ ์ ์ ์ฉํ ํ๊ตญ์ด ๋์ฐ๋ฏธ์
๋๋ค.<|im_end|>\n"
user_msg = f"<|im_start|>user\n{question}<|im_end|>\n"
return system_msg + user_msg + "<|im_start|>assistant\n"
# Run inference
def generate_response(question: str, max_new_tokens: int = 128) -> str:
prompt = build_chatml_prompt(question)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=False,
top_p=0.9,
temperature=0.7,
eos_token_id=tokenizer.eos_token_id,
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Example
question = "ํ๊ตญ์ ์๋๋ ์ด๋์ธ๊ฐ์?" # ๊ธฐ์กด ๋ชจ๋ธ(Qwen/Qwen2.5-Coder-1.5B-Instruct)์ ์๋ต -> ํ๊ตญ์ ์๋๋ ์์ธ์
๋๋ค.
response = generate_response(question)
print("๋ชจ๋ธ ์๋ต:\n", response)
```
# ์คํํ๊ฒฝ
Window 10
NVIDIA GeForce RTX 4070 Ti
# Framework Versions
Python: 3.10.14
PyTorch: 1.12.1
Transformers: 4.46.2
Datasets: 3.2.0
Tokenizers: 0.20.3
PEFT: 0.8.2 |