Update README.md
Browse files
README.md
CHANGED
|
@@ -19,12 +19,61 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
|
|
| 19 |
## Quick start
|
| 20 |
|
| 21 |
```python
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
text = "The capital of France is Paris."
|
| 25 |
-
rewarder = pipeline(model="None", device="cuda")
|
| 26 |
-
output = rewarder(text)[0]
|
| 27 |
-
print(output["score"])
|
| 28 |
```
|
| 29 |
|
| 30 |
## Training procedure
|
|
|
|
| 19 |
## Quick start
|
| 20 |
|
| 21 |
```python
|
| 22 |
+
import torch
|
| 23 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 24 |
+
from peft import PeftModel, PeftConfig
|
| 25 |
+
|
| 26 |
+
# -----------------------------
|
| 27 |
+
# 1. Define PEFT model ID & Checkpoint (Epoch)
|
| 28 |
+
# -----------------------------
|
| 29 |
+
peft_model_id = "xxccho/margin_reg_baseline_code"
|
| 30 |
+
|
| 31 |
+
# [ Checkpoints to Epochs Mapping ]
|
| 32 |
+
# Epoch 1 : "checkpoint-246"
|
| 33 |
+
# Epoch 2 : "checkpoint-492"
|
| 34 |
+
# Epoch 3 : "checkpoint-738"
|
| 35 |
+
# Epoch 4 : "checkpoint-984"
|
| 36 |
+
# Epoch 5 : "checkpoint-1230"
|
| 37 |
+
# Epoch 6 : "checkpoint-1476"
|
| 38 |
+
# Epoch 7 : "checkpoint-1722"
|
| 39 |
+
# Epoch 8 : "checkpoint-1968"
|
| 40 |
+
# Epoch 9 : "checkpoint-2214"
|
| 41 |
+
# Epoch 10 : "checkpoint-2460"
|
| 42 |
+
|
| 43 |
+
# 예시: 5 Epoch 체크포인트를 사용하려면 "checkpoint-1230" 할당. None일 시 최종(10x) 모델 로드
|
| 44 |
+
checkpoint = None
|
| 45 |
+
|
| 46 |
+
# 2. Load the PEFT config
|
| 47 |
+
config = PeftConfig.from_pretrained(peft_model_id, subfolder=checkpoint) if checkpoint else PeftConfig.from_pretrained(peft_model_id)
|
| 48 |
+
|
| 49 |
+
# 3. Load tokenizer
|
| 50 |
+
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
| 51 |
+
if tokenizer.pad_token is None:
|
| 52 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 53 |
+
|
| 54 |
+
# 4. Load base model
|
| 55 |
+
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 56 |
+
config.base_model_name_or_path,
|
| 57 |
+
num_labels=1,
|
| 58 |
+
torch_dtype=torch.bfloat16,
|
| 59 |
+
device_map="auto"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# 5. Apply LoRA adapter
|
| 63 |
+
model = PeftModel.from_pretrained(base_model, peft_model_id, subfolder=checkpoint) if checkpoint else PeftModel.from_pretrained(base_model, peft_model_id)
|
| 64 |
+
model.config.pad_token_id = tokenizer.pad_token_id
|
| 65 |
+
model.eval()
|
| 66 |
+
|
| 67 |
+
# Example Usage
|
| 68 |
+
text = "User: Write a python code for calculating fibonacci sequence.\nAssistant: Here is the code..."
|
| 69 |
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 70 |
+
|
| 71 |
+
with torch.no_grad():
|
| 72 |
+
outputs = model(**inputs)
|
| 73 |
+
reward_score = outputs.logits.squeeze().item()
|
| 74 |
+
|
| 75 |
+
print(f"[Code] Reward Score: {reward_score:.4f}")
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
```
|
| 78 |
|
| 79 |
## Training procedure
|