File size: 3,791 Bytes
9ae43b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9df7d3
9ae43b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bbd250
 
 
 
 
 
 
 
 
 
 
 
 
 
c9df7d3
6bbd250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e929806
6bbd250
 
 
9ae43b7
 
 
 
 
 
 
74060e3
 
 
9ae43b7
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
---
license: apache-2.0
datasets:
- beomi/KoAlpaca-RealQA
language:
- ko
base_model:
- Qwen/Qwen2.5-Coder-1.5B-Instruct
pipeline_tag: text-generation
---

# Model Description
Qwen/Qwen2.5-Coder-1.5B-Instruct์„ ๊ธฐ๋ฐ˜์œผ๋กœ PEFT๋ฅผ ์ด์šฉํ•˜์—ฌ QLoRA (4-bit quantization + PEFT)ํ•ด๋ณธ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.

ํ•™์Šต ๋ฐ์ดํ„ฐ๋Š” beomi/KoAlpaca-RealQA๋ฅผ ์‚ฌ์šฉํ•˜์˜€์Šต๋‹ˆ๋‹ค.

์ž‘์€ ๋ชจ๋ธ์„ ์ด์šฉํ•˜์—ฌ QLoRA๋ฅผ ํ•œ ๊ฒƒ์ด๋‹ค ๋ณด๋‹ˆ ์–‘์งˆ์˜ output์ด ๋‚˜์˜ค์ง€๋Š” ์•Š์ง€๋งŒ QLoRA๋ชจ๋ธ๊ณผ ์›๋ณธ๋ชจ๋ธ์˜ ๋‹ต๋ณ€์ด ์ฐจ์ด๋Š” ํ™•์‹คํžˆ ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค.

# Quantization Configuration
```python
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)
```

# LoRA Condifiguration
```python
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["c_attn", "q_proj", "v_proj"]
)
```

# Training Arguments
```python
training_args = TrainingArguments(
    num_train_epochs=8,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    evaluation_strategy="steps",
    eval_steps=300,
    save_strategy="steps",
    save_steps=300,
    logging_steps=300,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)
```

# Training Progress
| Step | Training Loss | Validation Loss |
|------|---------------|-----------------|
| 300  | 1.595000      | 1.611501        |
| 600  | 1.593300      | 1.596210        |
| 900  | 1.577600      | 1.586121        |
| 1200 | 1.564600      | 1.577804        |
| ...  | ...           | ...             |
| 7200 | 1.499700      | 1.525933        |
| 7500 | 1.493400      | 1.525612        |
| 7800 | 1.491000      | 1.525330        |
| 8100 | 1.499900      | 1.525138        |


# ์‹คํ–‰ ์ฝ”๋“œ
```python
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# Quantization config (must match QLoRA settings used during fine-tuning)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

# Load tokenizer and model (local or hub path)
model_path = "onebeans/Qwen2.5-Coder-KoInstruct-QLoRA"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    device_map="auto"
)
model.eval()

# Define prompt using ChatML format (Qwen-style)
def build_chatml_prompt(question: str) -> str:
    system_msg = "<|im_start|>system\n๋‹น์‹ ์€ ์œ ์šฉํ•œ ํ•œ๊ตญ์–ด ๋„์šฐ๋ฏธ์ž…๋‹ˆ๋‹ค.<|im_end|>\n"
    user_msg = f"<|im_start|>user\n{question}<|im_end|>\n"
    return system_msg + user_msg + "<|im_start|>assistant\n"

# Run inference
def generate_response(question: str, max_new_tokens: int = 128) -> str:
    prompt = build_chatml_prompt(question)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            top_p=0.9,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id,
        )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example
question = "ํ•œ๊ตญ์˜ ์ˆ˜๋„๋Š” ์–ด๋””์ธ๊ฐ€์š”?" # ๊ธฐ์กด ๋ชจ๋ธ(Qwen/Qwen2.5-Coder-1.5B-Instruct)์˜ ์‘๋‹ต -> ํ•œ๊ตญ์˜ ์ˆ˜๋„๋Š” ์„œ์šธ์ž…๋‹ˆ๋‹ค.
response = generate_response(question)
print("๋ชจ๋ธ ์‘๋‹ต:\n", response)
```

# ์‹คํ–‰ํ™˜๊ฒฝ

Window 10

NVIDIA GeForce RTX 4070 Ti 

# Framework Versions 

Python: 3.10.14

PyTorch: 1.12.1

Transformers: 4.46.2

Datasets: 3.2.0

Tokenizers: 0.20.3

PEFT: 0.8.2