File size: 5,203 Bytes
d8a76be | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | # -*- coding: utf-8 -*-
import os, math, argparse, warnings
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
# ========== 评测核心:左填充编码 + 调你的 reward_model 推理 ==========
def build_left_padded_inputs(tokenizer, texts, max_length, device):
# 左填充 & pad token 兜底
tokenizer.padding_side = "left"
if tokenizer.pad_token_id is None:
if tokenizer.eos_token_id is not None:
tokenizer.pad_token = tokenizer.eos_token
else:
tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
enc = tokenizer(
texts,
padding=True,
truncation=True,
max_length=max_length,
return_tensors="pt",
)
inputs = (
enc["input_ids"].to(device),
enc["attention_mask"].to(device),
)
return inputs
@torch.inference_mode()
def score_texts_last_token(reward_model, tokenizer, texts, max_length, device):
"""
返回 shape=[len(texts)] 的标量分数列表。
内部严格左填充,确保 [:, -1] 是最后一个非 pad token。
"""
inputs = build_left_padded_inputs(tokenizer, texts, max_length, device)
# 你的框架推理约定:
# hidden: [B, T, H]
hidden = reward_model.model(*inputs).last_hidden_state
# score_seq: [B, T] or [B, T, 1]
score_seq = reward_model.score(hidden)
if score_seq.dim() == 3 and score_seq.size(-1) == 1:
score_seq = score_seq.squeeze(-1)
# 取最后 token 的分数(与你训练/推理示例一致)
scores = score_seq[:, -1]
# 防 NaN
scores = torch.nan_to_num(scores, nan=-1e30)
return scores.detach().float().cpu().tolist()
# ========== 数据拼接 ==========
def join_prompt_answer(prompt, answer, joiner="\n"):
p = (prompt or "").rstrip()
a = (answer or "").rstrip()
return f"{p}{joiner}{a}"
# ========== 主流程 ==========
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--data_path", type=str, required=True,
help="包含列 chosen_prompt/chosen/reject 的 parquet 路径")
ap.add_argument("--batch_size", type=int, default=16)
ap.add_argument("--max_length", type=int, default=1024)
ap.add_argument("--joiner", type=str, default="")
# ---- TODO: 你自己加载 reward_model 与 tokenizer 的方式(例如 from your_pkg import ...)----
args = ap.parse_args()
if not os.path.exists(args.data_path):
raise FileNotFoundError(args.data_path)
df = pd.read_parquet(args.data_path)
for col in ["chosen_prompt", "chosen", "reject"]:
if col not in df.columns:
raise ValueError(f"缺列 `{col}`,实际列:{list(df.columns)}")
# ================== TODO:加载你们项目里的 reward_model 和 tokenizer ==================
# 例子(伪代码):根据你们训练框架来
# from your_framework import load_reward_model
# reward_model, tokenizer = load_reward_model("/path/to/your/rm")
# 这里给出占位:请替换为你自己的加载逻辑
# ======================================================================
reward_model = AutoModelForSequenceClassification.from_pretrained(
"/home/rm5.0_9e-6",
num_labels=1,
torch_dtype=torch.bfloat16,
use_cache=False,
)
tokenizer =AutoTokenizer.from_pretrained("/home/rm5.0_9e-6")
device = next(reward_model.parameters()).device
total = len(df)
correct = 0
seen = 0
print(f"Loaded {total} samples from {args.data_path}")
print("Start evaluating (pairwise chosen vs reject)...\n" + "-" * 70)
# 按 batch 处理:每样本 2 条文本(chosen/reject)
for start in range(0, total, args.batch_size):
end = min(start + args.batch_size, total)
batch = df.iloc[start:end]
pair_texts = []
for _, row in batch.iterrows():
pair_texts.append(join_prompt_answer(row["chosen_prompt"], row["chosen"], args.joiner)) # chosen
pair_texts.append(join_prompt_answer(row["chosen_prompt"], row["reject"], args.joiner)) # reject
# 批量打分(左填充,取最后一位)
scores = score_texts_last_token(
reward_model=reward_model,
tokenizer=tokenizer,
texts=pair_texts,
max_length=args.max_length,
device=device,
)
# 拆回每样本两个分数
for i, (_, row) in enumerate(batch.iterrows()):
chosen_score = float(scores[2 * i])
reject_score = float(scores[2 * i + 1])
seen += 1
is_correct = chosen_score > reject_score
correct += int(is_correct)
running_acc = correct / seen
print(
f"[{seen:6d}] "
f"Chosen={chosen_score:.6f} | Reject={reject_score:.6f} | "
f"Correct={is_correct} | RunningAcc={running_acc*100:.2f}%"
)
print("\n" + "-" * 70)
print(f"Finished. Total={seen}, Correct={correct}, FinalAcc={correct/seen*100:.2f}%")
if __name__ == "__main__":
main()
|